Sfoglia il codice sorgente

Adds support for proto2 syntax for Ruby gem.

This change only adds basic proto2 support without advanced features
like extensions, custom options, maps, etc.

The protoc binary now generates ruby code for proto2 syntax.
However, for now, it is restricted to proto2 files without advanced features
like extensions, in which case it still errors out.

This change also modifies the DSL to add proto messages to the DescriptorPool.
There is a new DSL Builder#add_file to create a new FileDescriptor. With this,
the generated ruby DSL looks something like:

Google::Protobuf::DescriptorPool.generated_pool.build do
  add_file "test.proto" do
    add_message "foo" do
      optional :val, :int32, 1
    end
  end
end
Harshit Chopra 8 anni fa
parent
commit
d0535cc09e

+ 5 - 0
.gitignore

@@ -171,9 +171,14 @@ js/testproto_libs2.js
 
 # ruby test output
 ruby/lib/
+ruby/tests/basic_test_pb.rb
+ruby/tests/basic_test_proto2_pb.rb
 ruby/tests/generated_code_pb.rb
 ruby/tests/test_import_pb.rb
 ruby/tests/test_ruby_package_pb.rb
+ruby/tests/generated_code_proto2_pb.rb
+ruby/tests/test_import_proto2_pb.rb
+ruby/tests/test_ruby_package_proto2_pb.rb
 ruby/Gemfile.lock
 ruby/compatibility_tests/v3.0.0/protoc
 ruby/compatibility_tests/v3.0.0/tests/generated_code_pb.rb

+ 26 - 1
ruby/Rakefile

@@ -86,20 +86,45 @@ end
 
 # Proto for tests.
 genproto_output << "tests/generated_code.rb"
+genproto_output << "tests/generated_code_proto2.rb"
 genproto_output << "tests/test_import.rb"
+genproto_output << "tests/test_import_proto2.rb"
 genproto_output << "tests/test_ruby_package.rb"
+genproto_output << "tests/test_ruby_package_proto2.rb"
+genproto_output << "tests/basic_test.rb"
+genproto_output << "tests/basic_test_proto2.rb"
 file "tests/generated_code.rb" => "tests/generated_code.proto" do |file_task|
   sh "../src/protoc --ruby_out=. tests/generated_code.proto"
 end
 
+file "tests/generated_code_proto2.rb" => "tests/generated_code_proto2.proto" do |file_task|
+  sh "../src/protoc --ruby_out=. tests/generated_code_proto2.proto"
+end
+
 file "tests/test_import.rb" => "tests/test_import.proto" do |file_task|
   sh "../src/protoc --ruby_out=. tests/test_import.proto"
 end
 
+file "tests/test_import_proto2.rb" => "tests/test_import_proto2.proto" do |file_task|
+  sh "../src/protoc --ruby_out=. tests/test_import_proto2.proto"
+end
+
 file "tests/test_ruby_package.rb" => "tests/test_ruby_package.proto" do |file_task|
   sh "../src/protoc --ruby_out=. tests/test_ruby_package.proto"
 end
 
+file "tests/test_ruby_package_proto2.rb" => "tests/test_ruby_package_proto2.proto" do |file_task|
+  sh "../src/protoc --ruby_out=. tests/test_ruby_package_proto2.proto"
+end
+
+file "tests/basic_test.rb" => "tests/basic_test.proto" do |file_task|
+  sh "../src/protoc --ruby_out=. tests/basic_test.proto"
+end
+
+file "tests/basic_test_proto2.rb" => "tests/basic_test_proto2.proto" do |file_task|
+  sh "../src/protoc --ruby_out=. tests/basic_test_proto2.proto"
+end
+
 task :genproto => genproto_output
 
 task :clean do
@@ -110,7 +135,7 @@ Gem::PackageTask.new(spec) do |pkg|
 end
 
 Rake::TestTask.new(:test => :build) do |t|
-  t.test_files = FileList["tests/*.rb"].exclude("tests/gc_test.rb")
+  t.test_files = FileList["tests/*.rb"].exclude("tests/gc_test.rb", "tests/common_tests.rb")
 end
 
 # gc_test needs to be split out to ensure the generated file hasn't been

+ 549 - 66
ruby/ext/google/protobuf_c/defs.c

@@ -122,7 +122,7 @@ void DescriptorPool_register(VALUE module) {
       module, "DescriptorPool", rb_cObject);
   rb_define_alloc_func(klass, DescriptorPool_alloc);
   rb_define_method(klass, "add", DescriptorPool_add, 1);
-  rb_define_method(klass, "build", DescriptorPool_build, 0);
+  rb_define_method(klass, "build", DescriptorPool_build, -1);
   rb_define_method(klass, "lookup", DescriptorPool_lookup, 1);
   rb_define_singleton_method(klass, "generated_pool",
                              DescriptorPool_generated_pool, 0);
@@ -181,7 +181,7 @@ VALUE DescriptorPool_add(VALUE _self, VALUE def) {
  * Builder#add_enum within the block as appropriate.  This is the recommended,
  * idiomatic way to define new message and enum types.
  */
-VALUE DescriptorPool_build(VALUE _self) {
+VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self) {
   VALUE ctx = rb_class_new_instance(0, NULL, cBuilder);
   VALUE block = rb_block_proc();
   rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);
@@ -289,6 +289,7 @@ void Descriptor_register(VALUE module) {
   VALUE klass = rb_define_class_under(
       module, "Descriptor", rb_cObject);
   rb_define_alloc_func(klass, Descriptor_alloc);
+  rb_define_method(klass, "initialize", Descriptor_initialize, 1);
   rb_define_method(klass, "each", Descriptor_each, 0);
   rb_define_method(klass, "lookup", Descriptor_lookup, 1);
   rb_define_method(klass, "add_field", Descriptor_add_field, 1);
@@ -298,11 +299,42 @@ void Descriptor_register(VALUE module) {
   rb_define_method(klass, "msgclass", Descriptor_msgclass, 0);
   rb_define_method(klass, "name", Descriptor_name, 0);
   rb_define_method(klass, "name=", Descriptor_name_set, 1);
+  rb_define_method(klass, "file_descriptor", Descriptor_file_descriptor, 0);
   rb_include_module(klass, rb_mEnumerable);
   rb_gc_register_address(&cDescriptor);
   cDescriptor = klass;
 }
 
+/*
+ * call-seq:
+ *    Descriptor.new(file_descriptor)
+ *
+ * Initializes a new descriptor and assigns a file descriptor to it.
+ */
+VALUE Descriptor_initialize(VALUE _self, VALUE file_descriptor_rb) {
+  DEFINE_SELF(Descriptor, self, _self);
+
+  FileDescriptor* file_descriptor = ruby_to_FileDescriptor(file_descriptor_rb);
+
+  CHECK_UPB(
+        upb_filedef_addmsg(file_descriptor->filedef, self->msgdef, NULL, &status),
+        "Failed to associate message to file descriptor.");
+  add_def_obj(file_descriptor->filedef, file_descriptor_rb);
+
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *    Descriptor.file_descriptor
+ *
+ * Returns the FileDescriptor object this message belongs to.
+ */
+VALUE Descriptor_file_descriptor(VALUE _self) {
+  DEFINE_SELF(Descriptor, self, _self);
+  return get_def_obj(upb_def_file(self->msgdef));
+}
+
 /*
  * call-seq:
  *     Descriptor.name => name
@@ -470,6 +502,142 @@ VALUE Descriptor_msgclass(VALUE _self) {
   return self->klass;
 }
 
+// -----------------------------------------------------------------------------
+// FileDescriptor.
+// -----------------------------------------------------------------------------
+
+DEFINE_CLASS(FileDescriptor, "Google::Protobuf::FileDescriptor");
+
+void FileDescriptor_mark(void* _self) {
+}
+
+void FileDescriptor_free(void* _self) {
+  FileDescriptor* self = _self;
+  upb_filedef_unref(self->filedef, &self->filedef);
+  xfree(self);
+}
+
+/*
+ * call-seq:
+ *     FileDescriptor.new => file
+ *
+ * Returns a new file descriptor. The syntax must be set before it's passed
+ * to a builder.
+ */
+VALUE FileDescriptor_alloc(VALUE klass) {
+  FileDescriptor* self = ALLOC(FileDescriptor);
+  VALUE ret = TypedData_Wrap_Struct(klass, &_FileDescriptor_type, self);
+  upb_filedef* filedef = upb_filedef_new(&self->filedef);
+  self->filedef = filedef;
+  return ret;
+}
+
+void FileDescriptor_register(VALUE module) {
+  VALUE klass = rb_define_class_under(
+      module, "FileDescriptor", rb_cObject);
+  rb_define_alloc_func(klass, FileDescriptor_alloc);
+  rb_define_method(klass, "initialize", FileDescriptor_initialize, -1);
+  rb_define_method(klass, "name", FileDescriptor_name, 0);
+  rb_define_method(klass, "syntax", FileDescriptor_syntax, 0);
+  rb_define_method(klass, "syntax=", FileDescriptor_syntax_set, 1);
+  cFileDescriptor = klass;
+  rb_gc_register_address(&cFileDescriptor);
+}
+
+/*
+ * call-seq:
+ *     FileDescriptor.new(name, options = nil) => file
+ *
+ * Initializes a new file descriptor with the given file name.
+ * Also accepts an optional "options" hash, specifying other optional
+ * metadata about the file. The options hash currently accepts the following
+ *   * "syntax": :proto2 or :proto3 (default: :proto3)
+ */
+VALUE FileDescriptor_initialize(int argc, VALUE* argv, VALUE _self) {
+  DEFINE_SELF(FileDescriptor, self, _self);
+
+  VALUE name_rb;
+  VALUE options = Qnil;
+  rb_scan_args(argc, argv, "11", &name_rb, &options);
+
+  if (name_rb != Qnil) {
+    Check_Type(name_rb, T_STRING);
+    const char* name = get_str(name_rb);
+    CHECK_UPB(upb_filedef_setname(self->filedef, name, &status),
+	      "Error setting file name");
+  }
+
+  // Default syntax is proto3.
+  VALUE syntax = ID2SYM(rb_intern("proto3"));
+  if (options != Qnil) {
+    Check_Type(options, T_HASH);
+
+    if (rb_funcall(options, rb_intern("key?"), 1,
+		   ID2SYM(rb_intern("syntax"))) == Qtrue) {
+      syntax = rb_hash_lookup(options, ID2SYM(rb_intern("syntax")));
+    }
+  }
+  FileDescriptor_syntax_set(_self, syntax);
+
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     FileDescriptor.name => name
+ *
+ * Returns the name of the file.
+ */
+VALUE FileDescriptor_name(VALUE _self) {
+  DEFINE_SELF(FileDescriptor, self, _self);
+  const char* name = upb_filedef_name(self->filedef);
+  return name == NULL ? Qnil : rb_str_new2(name);
+}
+
+/*
+ * call-seq:
+ *     FileDescriptor.syntax => syntax
+ *
+ * Returns this file descriptors syntax.
+ *
+ * Valid syntax versions are:
+ *     :proto2 or :proto3.
+ */
+VALUE FileDescriptor_syntax(VALUE _self) {
+  DEFINE_SELF(FileDescriptor, self, _self);
+
+  switch (upb_filedef_syntax(self->filedef)) {
+    case UPB_SYNTAX_PROTO3: return ID2SYM(rb_intern("proto3"));
+    case UPB_SYNTAX_PROTO2: return ID2SYM(rb_intern("proto2"));
+    default: return Qnil;
+  }
+}
+
+/*
+ * call-seq:
+ *     FileDescriptor.syntax = version
+ *
+ * Sets this file descriptor's syntax, can be :proto3 or :proto2.
+ */
+VALUE FileDescriptor_syntax_set(VALUE _self, VALUE syntax_rb) {
+  DEFINE_SELF(FileDescriptor, self, _self);
+  Check_Type(syntax_rb, T_SYMBOL);
+
+  upb_syntax_t syntax;
+  if (SYM2ID(syntax_rb) == rb_intern("proto3")) {
+    syntax = UPB_SYNTAX_PROTO3;
+  } else if (SYM2ID(syntax_rb) == rb_intern("proto2")) {
+    syntax = UPB_SYNTAX_PROTO2;
+  } else {
+    rb_raise(rb_eArgError, "Expected :proto3 or :proto3, received '%s'",
+	     rb_id2name(SYM2ID(syntax_rb)));
+  }
+
+  CHECK_UPB(upb_filedef_setsyntax(self->filedef, syntax, &status),
+          "Error setting file syntax for proto");
+  return Qnil;
+}
+
 // -----------------------------------------------------------------------------
 // FieldDescriptor.
 // -----------------------------------------------------------------------------
@@ -509,6 +677,8 @@ void FieldDescriptor_register(VALUE module) {
   rb_define_method(klass, "name=", FieldDescriptor_name_set, 1);
   rb_define_method(klass, "type", FieldDescriptor_type, 0);
   rb_define_method(klass, "type=", FieldDescriptor_type_set, 1);
+  rb_define_method(klass, "default", FieldDescriptor_default, 0);
+  rb_define_method(klass, "default=", FieldDescriptor_default_set, 1);
   rb_define_method(klass, "label", FieldDescriptor_label, 0);
   rb_define_method(klass, "label=", FieldDescriptor_label_set, 1);
   rb_define_method(klass, "number", FieldDescriptor_number, 0);
@@ -516,6 +686,8 @@ void FieldDescriptor_register(VALUE module) {
   rb_define_method(klass, "submsg_name", FieldDescriptor_submsg_name, 0);
   rb_define_method(klass, "submsg_name=", FieldDescriptor_submsg_name_set, 1);
   rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0);
+  rb_define_method(klass, "has?", FieldDescriptor_has, 1);
+  rb_define_method(klass, "clear", FieldDescriptor_clear, 1);
   rb_define_method(klass, "get", FieldDescriptor_get, 1);
   rb_define_method(klass, "set", FieldDescriptor_set, 2);
   rb_gc_register_address(&cFieldDescriptor);
@@ -691,6 +863,71 @@ VALUE FieldDescriptor_type_set(VALUE _self, VALUE type) {
   return Qnil;
 }
 
+/*
+ * call-seq:
+ *     FieldDescriptor.default => default
+ *
+ * Returns this field's default, as a Ruby object, or nil if not yet set.
+ */
+VALUE FieldDescriptor_default(VALUE _self) {
+  DEFINE_SELF(FieldDescriptor, self, _self);
+  return layout_get_default(self->fielddef);
+}
+
+/*
+ * call-seq:
+ *     FieldDescriptor.default = default
+ *
+ * Sets this field's default value. Raises an exception when calling with
+ * proto syntax 3.
+ */
+VALUE FieldDescriptor_default_set(VALUE _self, VALUE default_value) {
+  DEFINE_SELF(FieldDescriptor, self, _self);
+  upb_fielddef* mut_def = check_field_notfrozen(self->fielddef);
+
+  switch (upb_fielddef_type(mut_def)) {
+    case UPB_TYPE_FLOAT: 
+      upb_fielddef_setdefaultfloat(mut_def, NUM2DBL(default_value));
+      break;
+    case UPB_TYPE_DOUBLE:
+      upb_fielddef_setdefaultdouble(mut_def, NUM2DBL(default_value));
+      break;
+    case UPB_TYPE_BOOL:
+      if (!RB_TYPE_P(default_value, T_TRUE) &&
+	  !RB_TYPE_P(default_value, T_FALSE) &&
+	  !RB_TYPE_P(default_value, T_NIL)) {
+        rb_raise(cTypeError, "Expected boolean for default value.");
+      }
+
+      upb_fielddef_setdefaultbool(mut_def, RTEST(default_value));
+      break;
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32: 
+      upb_fielddef_setdefaultint32(mut_def, NUM2INT(default_value));
+      break;
+    case UPB_TYPE_INT64: 
+      upb_fielddef_setdefaultint64(mut_def, NUM2INT(default_value));
+      break;
+    case UPB_TYPE_UINT32: 
+      upb_fielddef_setdefaultuint32(mut_def, NUM2UINT(default_value));
+      break;
+    case UPB_TYPE_UINT64: 
+      upb_fielddef_setdefaultuint64(mut_def, NUM2UINT(default_value));
+      break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES:
+      CHECK_UPB(upb_fielddef_setdefaultcstr(mut_def, StringValuePtr(default_value),
+					    &status),
+                "Error setting default string");
+      break;
+    default:
+      rb_raise(rb_eArgError, "Defaults not supported on field %s.%s",
+	       upb_fielddef_fullname(mut_def), upb_fielddef_name(mut_def));
+  }
+
+  return Qnil;
+}
+
 /*
  * call-seq:
  *     FieldDescriptor.label => label
@@ -859,6 +1096,44 @@ VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb) {
   return layout_get(msg->descriptor->layout, Message_data(msg), self->fielddef);
 }
 
+/*
+ * call-seq:
+ *     FieldDescriptor.has?(message) => boolean
+ *
+ * Returns whether the value is set on the given message. Raises an
+ * exception when calling with proto syntax 3.
+ */
+VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb) {
+  DEFINE_SELF(FieldDescriptor, self, _self);
+  MessageHeader* msg;
+  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
+  if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) {
+    rb_raise(cTypeError, "has method called on wrong message type");
+  } else if (!upb_fielddef_haspresence(self->fielddef)) {
+    rb_raise(rb_eArgError, "does not track presence");
+  }
+
+  return layout_has(msg->descriptor->layout, Message_data(msg), self->fielddef);
+}
+
+/*
+ * call-seq:
+ *     FieldDescriptor.clear(message)
+ *
+ * Clears the field from the message if it's set.
+ */
+VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb) {
+  DEFINE_SELF(FieldDescriptor, self, _self);
+  MessageHeader* msg;
+  TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
+  if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) {
+    rb_raise(cTypeError, "has method called on wrong message type");
+  }
+
+  layout_clear(msg->descriptor->layout, Message_data(msg), self->fielddef);
+  return Qnil;
+}
+
 /*
  * call-seq:
  *     FieldDescriptor.set(message, value)
@@ -1029,6 +1304,7 @@ void EnumDescriptor_register(VALUE module) {
   VALUE klass = rb_define_class_under(
       module, "EnumDescriptor", rb_cObject);
   rb_define_alloc_func(klass, EnumDescriptor_alloc);
+  rb_define_method(klass, "initialize", EnumDescriptor_initialize, 1);
   rb_define_method(klass, "name", EnumDescriptor_name, 0);
   rb_define_method(klass, "name=", EnumDescriptor_name_set, 1);
   rb_define_method(klass, "add_value", EnumDescriptor_add_value, 2);
@@ -1036,11 +1312,41 @@ void EnumDescriptor_register(VALUE module) {
   rb_define_method(klass, "lookup_value", EnumDescriptor_lookup_value, 1);
   rb_define_method(klass, "each", EnumDescriptor_each, 0);
   rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0);
+  rb_define_method(klass, "file_descriptor", EnumDescriptor_file_descriptor, 0);
   rb_include_module(klass, rb_mEnumerable);
   rb_gc_register_address(&cEnumDescriptor);
   cEnumDescriptor = klass;
 }
 
+/*
+ * call-seq:
+ *    Descriptor.new(file_descriptor)
+ *
+ * Initializes a new descriptor and assigns a file descriptor to it.
+ */
+VALUE EnumDescriptor_initialize(VALUE _self, VALUE file_descriptor_rb) {
+  DEFINE_SELF(EnumDescriptor, self, _self);
+  FileDescriptor* file_descriptor = ruby_to_FileDescriptor(file_descriptor_rb);
+  CHECK_UPB(
+        upb_filedef_addenum(file_descriptor->filedef, self->enumdef,
+			    NULL, &status),
+        "Failed to associate enum to file descriptor.");
+  add_def_obj(file_descriptor->filedef, file_descriptor_rb);
+
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *    Descriptor.file_descriptor
+ *
+ * Returns the FileDescriptor object this enum belongs to.
+ */
+VALUE EnumDescriptor_file_descriptor(VALUE _self) {
+  DEFINE_SELF(EnumDescriptor, self, _self);
+  return get_def_obj(upb_def_file(self->enumdef));
+}
+
 /*
  * call-seq:
  *     EnumDescriptor.name => name
@@ -1223,34 +1529,56 @@ VALUE MessageBuilderContext_initialize(VALUE _self,
   return Qnil;
 }
 
-static VALUE msgdef_add_field(VALUE msgdef,
+static VALUE msgdef_add_field(VALUE msgdef_rb,
                               const char* label, VALUE name,
                               VALUE type, VALUE number,
-                              VALUE type_class) {
-  VALUE fielddef = rb_class_new_instance(0, NULL, cFieldDescriptor);
+                              VALUE type_class,
+                              VALUE options) {
+  VALUE fielddef_rb = rb_class_new_instance(0, NULL, cFieldDescriptor);
   VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name)));
 
-  rb_funcall(fielddef, rb_intern("label="), 1, ID2SYM(rb_intern(label)));
-  rb_funcall(fielddef, rb_intern("name="), 1, name_str);
-  rb_funcall(fielddef, rb_intern("type="), 1, type);
-  rb_funcall(fielddef, rb_intern("number="), 1, number);
+  rb_funcall(fielddef_rb, rb_intern("label="), 1, ID2SYM(rb_intern(label)));
+  rb_funcall(fielddef_rb, rb_intern("name="), 1, name_str);
+  rb_funcall(fielddef_rb, rb_intern("type="), 1, type);
+  rb_funcall(fielddef_rb, rb_intern("number="), 1, number);
 
   if (type_class != Qnil) {
-    if (TYPE(type_class) != T_STRING) {
-      rb_raise(rb_eArgError, "Expected string for type class");
-    }
+    Check_Type(type_class, T_STRING);
+
     // Make it an absolute type name by prepending a dot.
     type_class = rb_str_append(rb_str_new2("."), type_class);
-    rb_funcall(fielddef, rb_intern("submsg_name="), 1, type_class);
+    rb_funcall(fielddef_rb, rb_intern("submsg_name="), 1, type_class);
   }
 
-  rb_funcall(msgdef, rb_intern("add_field"), 1, fielddef);
-  return fielddef;
+  if (options != Qnil) {
+    Check_Type(options, T_HASH);
+
+    if (rb_funcall(options, rb_intern("key?"), 1,
+		   ID2SYM(rb_intern("default"))) == Qtrue) {
+      Descriptor* msgdef = ruby_to_Descriptor(msgdef_rb);
+      if (upb_msgdef_syntax((upb_msgdef*)msgdef->msgdef) == UPB_SYNTAX_PROTO3) {
+        rb_raise(rb_eArgError, "Cannot set :default when using proto3 syntax.");
+      }
+
+      FieldDescriptor* fielddef = ruby_to_FieldDescriptor(fielddef_rb);
+      if (!upb_fielddef_haspresence((upb_fielddef*)fielddef->fielddef) ||
+	  upb_fielddef_issubmsg((upb_fielddef*)fielddef->fielddef)) {
+        rb_raise(rb_eArgError, "Cannot set :default on this kind of field.");
+      }
+
+      rb_funcall(fielddef_rb, rb_intern("default="), 1,
+		 rb_hash_lookup(options, ID2SYM(rb_intern("default"))));
+    }
+  }
+
+  rb_funcall(msgdef_rb, rb_intern("add_field"), 1, fielddef_rb);
+  return fielddef_rb;
 }
 
 /*
  * call-seq:
- *     MessageBuilderContext.optional(name, type, number, type_class = nil)
+ *     MessageBuilderContext.optional(name, type, number, type_class = nil,
+ *                                    options = nil)
  *
  * Defines a new optional field on this message type with the given type, tag
  * number, and type class (for message and enum fields). The type must be a Ruby
@@ -1259,23 +1587,26 @@ static VALUE msgdef_add_field(VALUE msgdef,
  */
 VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self) {
   DEFINE_SELF(MessageBuilderContext, self, _self);
-  VALUE name, type, number, type_class;
+  VALUE name, type, number;
+  VALUE type_class, options = Qnil;
 
-  if (argc < 3) {
-    rb_raise(rb_eArgError, "Expected at least 3 arguments.");
+  rb_scan_args(argc, argv, "32", &name, &type, &number, &type_class, &options);
+
+  // Allow passing (name, type, number, options) or
+  // (name, type, number, type_class, options)
+  if (argc == 4 && RB_TYPE_P(type_class, T_HASH)) {
+    options = type_class;
+    type_class = Qnil;
   }
-  name = argv[0];
-  type = argv[1];
-  number = argv[2];
-  type_class = (argc > 3) ? argv[3] : Qnil;
 
   return msgdef_add_field(self->descriptor, "optional",
-                          name, type, number, type_class);
+                          name, type, number, type_class, options);
 }
 
 /*
  * call-seq:
- *     MessageBuilderContext.required(name, type, number, type_class = nil)
+ *     MessageBuilderContext.required(name, type, number, type_class = nil,
+ *                                    options = nil)
  *
  * Defines a new required field on this message type with the given type, tag
  * number, and type class (for message and enum fields). The type must be a Ruby
@@ -1288,18 +1619,20 @@ VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self) {
  */
 VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self) {
   DEFINE_SELF(MessageBuilderContext, self, _self);
-  VALUE name, type, number, type_class;
+  VALUE name, type, number;
+  VALUE type_class, options = Qnil;
 
-  if (argc < 3) {
-    rb_raise(rb_eArgError, "Expected at least 3 arguments.");
+  rb_scan_args(argc, argv, "32", &name, &type, &number, &type_class, &options);
+
+  // Allow passing (name, type, number, options) or
+  // (name, type, number, type_class, options)
+  if (argc == 4 && RB_TYPE_P(type_class, T_HASH)) {
+    options = type_class;
+    type_class = Qnil;
   }
-  name = argv[0];
-  type = argv[1];
-  number = argv[2];
-  type_class = (argc > 3) ? argv[3] : Qnil;
 
   return msgdef_add_field(self->descriptor, "required",
-                          name, type, number, type_class);
+                          name, type, number, type_class, options);
 }
 
 /*
@@ -1324,7 +1657,7 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) {
   type_class = (argc > 3) ? argv[3] : Qnil;
 
   return msgdef_add_field(self->descriptor, "repeated",
-                          name, type, number, type_class);
+                          name, type, number, type_class, Qnil);
 }
 
 /*
@@ -1365,9 +1698,17 @@ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
              "type.");
   }
 
+  Descriptor* descriptor = ruby_to_Descriptor(self->descriptor);
+  if (upb_msgdef_syntax(descriptor->msgdef) == UPB_SYNTAX_PROTO2) {
+    rb_raise(rb_eArgError,
+	     "Cannot add a native map field using proto2 syntax.");
+  }
+
   // Create a new message descriptor for the map entry message, and create a
   // repeated submessage field here with that type.
-  mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor);
+  VALUE file_descriptor_rb =
+      rb_funcall(self->descriptor, rb_intern("file_descriptor"), 0);
+  mapentry_desc = rb_class_new_instance(1, &file_descriptor_rb, cDescriptor);
   mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0);
   mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_");
   mapentry_desc_name = rb_str_cat2(mapentry_desc_name,
@@ -1410,8 +1751,8 @@ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
   {
     // Add the map-entry message type to the current builder, and use the type
     // to create the map field itself.
-    Builder* builder_self = ruby_to_Builder(self->builder);
-    rb_ary_push(builder_self->pending_list, mapentry_desc);
+    Builder* builder = ruby_to_Builder(self->builder);
+    rb_ary_push(builder->pending_list, mapentry_desc);
   }
 
   {
@@ -1514,7 +1855,8 @@ VALUE OneofBuilderContext_initialize(VALUE _self,
 
 /*
  * call-seq:
- *     OneofBuilderContext.optional(name, type, number, type_class = nil)
+ *     OneofBuilderContext.optional(name, type, number, type_class = nil,
+ *                                  default_value = nil)
  *
  * Defines a new optional field in this oneof with the given type, tag number,
  * and type class (for message and enum fields). The type must be a Ruby symbol
@@ -1523,18 +1865,13 @@ VALUE OneofBuilderContext_initialize(VALUE _self,
  */
 VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self) {
   DEFINE_SELF(OneofBuilderContext, self, _self);
-  VALUE name, type, number, type_class;
+  VALUE name, type, number;
+  VALUE type_class, options = Qnil;
 
-  if (argc < 3) {
-    rb_raise(rb_eArgError, "Expected at least 3 arguments.");
-  }
-  name = argv[0];
-  type = argv[1];
-  number = argv[2];
-  type_class = (argc > 3) ? argv[3] : Qnil;
+  rb_scan_args(argc, argv, "32", &name, &type, &number, &type_class, &options);
 
   return msgdef_add_field(self->descriptor, "optional",
-                          name, type, number, type_class);
+                          name, type, number, type_class, options);
 }
 
 // -----------------------------------------------------------------------------
@@ -1604,6 +1941,112 @@ VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number) {
   return enumdef_add_value(self->enumdesc, name, number);
 }
 
+
+// -----------------------------------------------------------------------------
+// FileBuilderContext.
+// -----------------------------------------------------------------------------
+
+DEFINE_CLASS(FileBuilderContext,
+	     "Google::Protobuf::Internal::FileBuilderContext");
+
+void FileBuilderContext_mark(void* _self) {
+  FileBuilderContext* self = _self;
+  rb_gc_mark(self->pending_list);
+  rb_gc_mark(self->file_descriptor);
+  rb_gc_mark(self->builder);
+}
+
+void FileBuilderContext_free(void* _self) {
+  FileBuilderContext* self = _self;
+  xfree(self);
+}
+
+VALUE FileBuilderContext_alloc(VALUE klass) {
+  FileBuilderContext* self = ALLOC(FileBuilderContext);
+  VALUE ret = TypedData_Wrap_Struct(klass, &_FileBuilderContext_type, self);
+  self->pending_list = Qnil;
+  self->file_descriptor = Qnil;
+  self->builder = Qnil;
+  return ret;
+}
+
+void FileBuilderContext_register(VALUE module) {
+  VALUE klass = rb_define_class_under(module, "FileBuilderContext", rb_cObject);
+  rb_define_alloc_func(klass, FileBuilderContext_alloc);
+  rb_define_method(klass, "initialize", FileBuilderContext_initialize, 2);
+  rb_define_method(klass, "add_message", FileBuilderContext_add_message, 1);
+  rb_define_method(klass, "add_enum", FileBuilderContext_add_enum, 1);
+  rb_gc_register_address(&cFileBuilderContext);
+  cFileBuilderContext = klass;
+}
+
+/*
+ * call-seq:
+ *     FileBuilderContext.new(file_descriptor, builder) => context
+ *
+ * Create a new file builder context for the given file descriptor and
+ * builder context. This class is intended to serve as a DSL context to be used
+ * with #instance_eval.
+ */
+VALUE FileBuilderContext_initialize(VALUE _self, VALUE file_descriptor,
+				    VALUE builder) {
+  DEFINE_SELF(FileBuilderContext, self, _self);
+  self->pending_list = rb_ary_new();
+  self->file_descriptor = file_descriptor;
+  self->builder = builder;
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     FileBuilderContext.add_message(name, &block)
+ *
+ * Creates a new, empty descriptor with the given name, and invokes the block in
+ * the context of a MessageBuilderContext on that descriptor. The block can then
+ * call, e.g., MessageBuilderContext#optional and MessageBuilderContext#repeated
+ * methods to define the message fields.
+ *
+ * This is the recommended, idiomatic way to build message definitions.
+ */
+VALUE FileBuilderContext_add_message(VALUE _self, VALUE name) {
+  DEFINE_SELF(FileBuilderContext, self, _self);
+  VALUE msgdef = rb_class_new_instance(1, &self->file_descriptor, cDescriptor);
+  VALUE args[2] = { msgdef, self->builder };
+  VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
+  VALUE block = rb_block_proc();
+  rb_funcall(msgdef, rb_intern("name="), 1, name);
+  rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);
+  rb_ary_push(self->pending_list, msgdef);
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     FileBuilderContext.add_enum(name, &block)
+ *
+ * Creates a new, empty enum descriptor with the given name, and invokes the
+ * block in the context of an EnumBuilderContext on that descriptor. The block
+ * can then call EnumBuilderContext#add_value to define the enum values.
+ *
+ * This is the recommended, idiomatic way to build enum definitions.
+ */
+VALUE FileBuilderContext_add_enum(VALUE _self, VALUE name) {
+  DEFINE_SELF(FileBuilderContext, self, _self);
+  VALUE enumdef =
+      rb_class_new_instance(1, &self->file_descriptor, cEnumDescriptor);
+  VALUE ctx = rb_class_new_instance(1, &enumdef, cEnumBuilderContext);
+  VALUE block = rb_block_proc();
+  rb_funcall(enumdef, rb_intern("name="), 1, name);
+  rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);
+  rb_ary_push(self->pending_list, enumdef);
+  return Qnil;
+}
+
+VALUE FileBuilderContext_pending_descriptors(VALUE _self) {
+  DEFINE_SELF(FileBuilderContext, self, _self);
+  return self->pending_list;
+}
+
 // -----------------------------------------------------------------------------
 // Builder.
 // -----------------------------------------------------------------------------
@@ -1613,6 +2056,7 @@ DEFINE_CLASS(Builder, "Google::Protobuf::Internal::Builder");
 void Builder_mark(void* _self) {
   Builder* self = _self;
   rb_gc_mark(self->pending_list);
+  rb_gc_mark(self->default_file_descriptor);
 }
 
 void Builder_free(void* _self) {
@@ -1635,15 +2079,17 @@ VALUE Builder_alloc(VALUE klass) {
       klass, &_Builder_type, self);
   self->pending_list = Qnil;
   self->defs = NULL;
+  self->default_file_descriptor = Qnil;
   return ret;
 }
 
 void Builder_register(VALUE module) {
   VALUE klass = rb_define_class_under(module, "Builder", rb_cObject);
-  rb_define_alloc_func(klass, Builder_alloc);
+  rb_define_alloc_func(klass, Builder_alloc); 
+  rb_define_method(klass, "initialize", Builder_initialize, 0);
+  rb_define_method(klass, "add_file", Builder_add_file, -1);
   rb_define_method(klass, "add_message", Builder_add_message, 1);
   rb_define_method(klass, "add_enum", Builder_add_enum, 1);
-  rb_define_method(klass, "initialize", Builder_initialize, 0);
   rb_define_method(klass, "finalize_to_pool", Builder_finalize_to_pool, 1);
   rb_gc_register_address(&cBuilder);
   cBuilder = klass;
@@ -1651,13 +2097,40 @@ void Builder_register(VALUE module) {
 
 /*
  * call-seq:
- *     Builder.new(d) => builder
+ *    Builder.new
  *
- * Create a new message builder.
+ * Initializes a new builder.
  */
 VALUE Builder_initialize(VALUE _self) {
   DEFINE_SELF(Builder, self, _self);
   self->pending_list = rb_ary_new();
+  VALUE file_name = Qnil;
+  self->default_file_descriptor =
+      rb_class_new_instance(1, &file_name, cFileDescriptor);
+  return Qnil;
+}
+
+/*
+ * call-seq:
+ *     Builder.add_file(name, options = nil, &block)
+ *
+ * Creates a new, file descriptor with the given name and options and invokes
+ * the block in the context of a FileBuilderContext on that descriptor. The
+ * block can then call FileBuilderContext#add_message or
+ * FileBuilderContext#add_enum to define new messages or enums, respectively.
+ *
+ * This is the recommended, idiomatic way to build file descriptors.
+ */
+VALUE Builder_add_file(int argc, VALUE* argv, VALUE _self) {
+  DEFINE_SELF(Builder, self, _self);
+  VALUE file_descriptor = rb_class_new_instance(argc, argv, cFileDescriptor);
+  VALUE args[2] = { file_descriptor, _self };
+  VALUE ctx = rb_class_new_instance(2, args, cFileBuilderContext);
+  VALUE block = rb_block_proc();
+  rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);
+
+  rb_ary_concat(self->pending_list,
+		FileBuilderContext_pending_descriptors(ctx));
   return Qnil;
 }
 
@@ -1665,16 +2138,17 @@ VALUE Builder_initialize(VALUE _self) {
  * call-seq:
  *     Builder.add_message(name, &block)
  *
- * Creates a new, empty descriptor with the given name, and invokes the block in
- * the context of a MessageBuilderContext on that descriptor. The block can then
- * call, e.g., MessageBuilderContext#optional and MessageBuilderContext#repeated
- * methods to define the message fields.
+ * Old and deprecated way to create a new descriptor.
+ * See FileBuilderContext.add_message for the recommended way.
  *
- * This is the recommended, idiomatic way to build message definitions.
+ * Exists for backwards compatibility to allow building descriptor pool for
+ * files generated by protoc which don't add messages within "add_file" block.
+ * Descriptors created this way get assigned to a default empty FileDescriptor.
  */
 VALUE Builder_add_message(VALUE _self, VALUE name) {
   DEFINE_SELF(Builder, self, _self);
-  VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor);
+  VALUE msgdef =
+      rb_class_new_instance(1, &self->default_file_descriptor, cDescriptor);
   VALUE args[2] = { msgdef, _self };
   VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
   VALUE block = rb_block_proc();
@@ -1688,15 +2162,18 @@ VALUE Builder_add_message(VALUE _self, VALUE name) {
  * call-seq:
  *     Builder.add_enum(name, &block)
  *
- * Creates a new, empty enum descriptor with the given name, and invokes the
- * block in the context of an EnumBuilderContext on that descriptor. The block
- * can then call EnumBuilderContext#add_value to define the enum values.
+ * Old and deprecated way to create a new enum descriptor.
+ * See FileBuilderContext.add_enum for the recommended way.
  *
- * This is the recommended, idiomatic way to build enum definitions.
+ * Exists for backwards compatibility to allow building descriptor pool for
+ * files generated by protoc which don't add enums within "add_file" block.
+ * Enum descriptors created this way get assigned to a default empty
+ * FileDescriptor.
  */
 VALUE Builder_add_enum(VALUE _self, VALUE name) {
   DEFINE_SELF(Builder, self, _self);
-  VALUE enumdef = rb_class_new_instance(0, NULL, cEnumDescriptor);
+  VALUE enumdef =
+      rb_class_new_instance(1, &self->default_file_descriptor, cEnumDescriptor);
   VALUE ctx = rb_class_new_instance(1, &enumdef, cEnumBuilderContext);
   VALUE block = rb_block_proc();
   rb_funcall(enumdef, rb_intern("name="), 1, name);
@@ -1705,7 +2182,7 @@ VALUE Builder_add_enum(VALUE _self, VALUE name) {
   return Qnil;
 }
 
-static void validate_msgdef(const upb_msgdef* msgdef) {
+static void proto3_validate_msgdef(const upb_msgdef* msgdef) {
   // Verify that no required fields exist. proto3 does not support these.
   upb_msg_field_iter it;
   for (upb_msg_field_begin(&it, msgdef);
@@ -1718,7 +2195,7 @@ static void validate_msgdef(const upb_msgdef* msgdef) {
   }
 }
 
-static void validate_enumdef(const upb_enumdef* enumdef) {
+static void proto3_validate_enumdef(const upb_enumdef* enumdef) {
   // Verify that an entry exists with integer value 0. (This is the default
   // value.)
   const char* lookup = upb_enumdef_iton(enumdef, 0);
@@ -1753,10 +2230,16 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb) {
     VALUE def_rb = rb_ary_entry(self->pending_list, i);
     if (CLASS_OF(def_rb) == cDescriptor) {
       self->defs[i] = (upb_def*)ruby_to_Descriptor(def_rb)->msgdef;
-      validate_msgdef((const upb_msgdef*)self->defs[i]);
+      
+      if (upb_filedef_syntax(upb_def_file(self->defs[i])) == UPB_SYNTAX_PROTO3) {
+        proto3_validate_msgdef((const upb_msgdef*)self->defs[i]);
+      }
     } else if (CLASS_OF(def_rb) == cEnumDescriptor) {
       self->defs[i] = (upb_def*)ruby_to_EnumDescriptor(def_rb)->enumdef;
-      validate_enumdef((const upb_enumdef*)self->defs[i]);
+
+      if (upb_filedef_syntax(upb_def_file(self->defs[i])) == UPB_SYNTAX_PROTO3) {
+        proto3_validate_enumdef((const upb_enumdef*)self->defs[i]);
+      }
     }
   }
 

+ 82 - 33
ruby/ext/google/protobuf_c/encode_decode.c

@@ -100,24 +100,34 @@ void stringsink_uninit(stringsink *sink) {
 
 #define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
 
-// Creates a handlerdata that simply contains the offset for this field.
-static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) {
-  size_t* hd_ofs = ALLOC(size_t);
-  *hd_ofs = ofs;
-  upb_handlers_addcleanup(h, hd_ofs, xfree);
-  return hd_ofs;
+typedef struct {
+  size_t ofs;
+  int32_t hasbit;
+} field_handlerdata_t;
+
+// Creates a handlerdata that contains the offset and the hasbit for the field
+static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) {
+  field_handlerdata_t *hd = ALLOC(field_handlerdata_t);
+  hd->ofs = ofs;
+  hd->hasbit = hasbit;
+  upb_handlers_addcleanup(h, hd, xfree);
+  return hd;
 }
 
 typedef struct {
   size_t ofs;
+  int32_t hasbit;
   const upb_msgdef *md;
 } submsg_handlerdata_t;
 
 // Creates a handlerdata that contains offset and submessage type information.
-static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
+static const void *newsubmsghandlerdata(upb_handlers* h,
+                                        uint32_t ofs,
+                                        int32_t hasbit,
                                         const upb_fielddef* f) {
   submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
   hd->ofs = ofs;
+  hd->hasbit = hasbit;
   hd->md = upb_fielddef_msgsubdef(f);
   upb_handlers_addcleanup(h, hd, xfree);
   return hd;
@@ -189,6 +199,13 @@ static void* appendstr_handler(void *closure,
   return (void*)str;
 }
 
+static void set_hasbit(void *closure, int32_t hasbit) {
+  if (hasbit > 0) {
+    uint8_t* storage = closure;
+    storage[hasbit/8] |= 1 << (hasbit % 8);
+  }
+}
+
 // Appends a 'bytes' string to a repeated field.
 static void* appendbytes_handler(void *closure,
                                  const void *hd,
@@ -205,10 +222,12 @@ static void* str_handler(void *closure,
                          const void *hd,
                          size_t size_hint) {
   MessageHeader* msg = closure;
-  const size_t *ofs = hd;
+  const field_handlerdata_t *fieldhandler = hd;
+
   VALUE str = rb_str_new2("");
   rb_enc_associate(str, kRubyStringUtf8Encoding);
-  DEREF(msg, *ofs, VALUE) = str;
+  DEREF(msg, fieldhandler->ofs, VALUE) = str;
+  set_hasbit(closure, fieldhandler->hasbit);
   return (void*)str;
 }
 
@@ -217,10 +236,12 @@ static void* bytes_handler(void *closure,
                            const void *hd,
                            size_t size_hint) {
   MessageHeader* msg = closure;
-  const size_t *ofs = hd;
+  const field_handlerdata_t *fieldhandler = hd;
+
   VALUE str = rb_str_new2("");
   rb_enc_associate(str, kRubyString8bitEncoding);
-  DEREF(msg, *ofs, VALUE) = str;
+  DEREF(msg, fieldhandler->ofs, VALUE) = str;
+  set_hasbit(closure, fieldhandler->hasbit);
   return (void*)str;
 }
 
@@ -280,8 +301,11 @@ static void *submsg_handler(void *closure, const void *hd) {
         rb_class_new_instance(0, NULL, subklass);
   }
 
+  set_hasbit(closure, submsgdata->hasbit);
+
   submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
+
   return submsg;
 }
 
@@ -500,7 +524,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
                                             const upb_fielddef *f,
                                             size_t offset) {
   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-  upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+  upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, -1));
   upb_handlers_setstartseq(h, f, startseq_handler, &attr);
   upb_handlerattr_uninit(&attr);
 
@@ -534,7 +558,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
     }
     case UPB_TYPE_MESSAGE: {
       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
+      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, -1, f));
       upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
       upb_handlerattr_uninit(&attr);
       break;
@@ -545,7 +569,15 @@ static void add_handlers_for_repeated_field(upb_handlers *h,
 // Set up handlers for a singular field.
 static void add_handlers_for_singular_field(upb_handlers *h,
                                             const upb_fielddef *f,
-                                            size_t offset) {
+                                            size_t offset,
+                                            size_t hasbit_off) {
+  // The offset we pass to UPB points to the start of the Message,
+  // rather than the start of where our data is stored.
+  int32_t hasbit = -1;
+  if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) {
+    hasbit = hasbit_off + sizeof(MessageHeader) * 8;
+  }
+
   switch (upb_fielddef_type(f)) {
     case UPB_TYPE_BOOL:
     case UPB_TYPE_INT32:
@@ -555,13 +587,13 @@ static void add_handlers_for_singular_field(upb_handlers *h,
     case UPB_TYPE_INT64:
     case UPB_TYPE_UINT64:
     case UPB_TYPE_DOUBLE:
-      upb_msg_setscalarhandler(h, f, offset, -1);
+      upb_msg_setscalarhandler(h, f, offset, hasbit);
       break;
     case UPB_TYPE_STRING:
     case UPB_TYPE_BYTES: {
       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-      upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+      upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset, hasbit));
       upb_handlers_setstartstr(h, f,
                                is_bytes ? bytes_handler : str_handler,
                                &attr);
@@ -572,7 +604,9 @@ static void add_handlers_for_singular_field(upb_handlers *h,
     }
     case UPB_TYPE_MESSAGE: {
       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
-      upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
+      upb_handlerattr_sethandlerdata(&attr,
+				     newsubmsghandlerdata(h, offset,
+							  hasbit, f));
       upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
       upb_handlerattr_uninit(&attr);
       break;
@@ -610,10 +644,12 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
 
   add_handlers_for_singular_field(
       h, key_field,
-      offsetof(map_parse_frame_t, key_storage));
+      offsetof(map_parse_frame_t, key_storage),
+      MESSAGE_FIELD_NO_HASBIT);
   add_handlers_for_singular_field(
       h, value_field,
-      offsetof(map_parse_frame_t, value_storage));
+      offsetof(map_parse_frame_t, value_storage),
+      MESSAGE_FIELD_NO_HASBIT);
 }
 
 // Set up handlers for a oneof field.
@@ -718,7 +754,8 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
     } else if (upb_fielddef_isseq(f)) {
       add_handlers_for_repeated_field(h, f, offset);
     } else {
-      add_handlers_for_singular_field(h, f, offset);
+      add_handlers_for_singular_field(
+          h, f, offset, desc->layout->fields[upb_fielddef_index(f)].hasbit);
     }
   }
 }
@@ -901,11 +938,6 @@ VALUE Message_decode_json(VALUE klass, VALUE data) {
 
 /* msgvisitor *****************************************************************/
 
-// TODO: If/when we support proto2 semantics in addition to the current proto3
-// semantics, which means that we have true field presence, we will want to
-// modify msgvisitor so that it emits all present fields rather than all
-// non-default-value fields.
-
 static void putmsg(VALUE msg, const Descriptor* desc,
                    upb_sink *sink, int depth, bool emit_defaults);
 
@@ -962,6 +994,7 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
   int size;
 
   if (ary == Qnil) return;
+  if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return;
 
   size = NUM2INT(RepeatedField_length(ary));
   if (size == 0 && !emit_defaults) return;
@@ -1062,6 +1095,8 @@ static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
   Map_iter it;
 
   if (map == Qnil) return;
+  if (!emit_defaults && Map_length(map) == 0) return;
+
   self = ruby_to_Map(map);
 
   upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
@@ -1144,7 +1179,15 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
       }
     } else if (upb_fielddef_isstring(f)) {
       VALUE str = DEREF(msg, offset, VALUE);
-      if (is_matching_oneof || emit_defaults || RSTRING_LEN(str) > 0) {
+      bool is_default = false;
+
+      if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) {
+        is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;
+      } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {
+        is_default = RSTRING_LEN(str) == 0;
+      }
+
+      if (is_matching_oneof || emit_defaults || !is_default) {
         putstr(str, f, sink);
       }
     } else if (upb_fielddef_issubmsg(f)) {
@@ -1152,13 +1195,19 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
     } else {
       upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
 
-#define T(upbtypeconst, upbtype, ctype, default_value)                    \
-  case upbtypeconst: {                                                    \
-      ctype value = DEREF(msg, offset, ctype);                            \
-      if (is_matching_oneof || emit_defaults || value != default_value) { \
-        upb_sink_put##upbtype(sink, sel, value);                          \
-      }                                                                   \
-    }                                                                     \
+#define T(upbtypeconst, upbtype, ctype, default_value)                          \
+  case upbtypeconst: {                                                          \
+      ctype value = DEREF(msg, offset, ctype);                                  \
+      bool is_default = false;                                                  \
+      if (upb_fielddef_haspresence(f)) {                                        \
+        is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse;  \
+      } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) {        \
+        is_default = default_value == value;                                    \
+      }                                                                         \
+      if (is_matching_oneof || emit_defaults || !is_default) {                  \
+        upb_sink_put##upbtype(sink, sel, value);                                \
+      }                                                                         \
+    }                                                                           \
     break;
 
       switch (upb_fielddef_type(f)) {

+ 127 - 64
ruby/ext/google/protobuf_c/message.c

@@ -79,7 +79,7 @@ VALUE Message_alloc(VALUE klass) {
   return ret;
 }
 
-static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
+static const upb_fielddef* which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
   upb_oneof_iter it;
   size_t case_ofs;
   uint32_t oneof_case;
@@ -88,7 +88,7 @@ static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
 
   // If no fields in the oneof, always nil.
   if (upb_oneofdef_numfields(o) == 0) {
-    return Qnil;
+    return NULL;
   }
   // Grab the first field in the oneof so we can get its layout info to find the
   // oneof_case field.
@@ -103,22 +103,83 @@ static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
   oneof_case = *((uint32_t*)((char*)Message_data(self) + case_ofs));
 
   if (oneof_case == ONEOF_CASE_NONE) {
-    return Qnil;
+    return NULL;
   }
 
   // oneof_case is a field index, so find that field.
   f = upb_oneofdef_itof(o, oneof_case);
   assert(f != NULL);
 
-  return ID2SYM(rb_intern(upb_fielddef_name(f)));
+  return f;
+}
+
+enum {
+  METHOD_UNKNOWN = 0,
+  METHOD_GETTER = 1,
+  METHOD_SETTER = 2,
+  METHOD_CLEAR = 3,
+  METHOD_PRESENCE = 4
+};
+
+static int extract_method_call(VALUE method_name, MessageHeader* self,
+			       const upb_fielddef **f, const upb_oneofdef **o) {
+  Check_Type(method_name, T_SYMBOL);
+
+  VALUE method_str = rb_id2str(SYM2ID(method_name));
+  char* name = RSTRING_PTR(method_str);
+  size_t name_len = RSTRING_LEN(method_str);
+  int accessor_type;
+  const upb_oneofdef* test_o;
+  const upb_fielddef* test_f;
+
+  if (name[name_len - 1] == '=') {
+    accessor_type = METHOD_SETTER;
+    name_len--;
+    // We want to ensure if the proto has something named clear_foo or has_foo?,
+    // we don't strip the prefix.
+  } else if (strncmp("clear_", name, 6) == 0 &&
+             !upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len,
+				    &test_f, &test_o)) {
+    accessor_type = METHOD_CLEAR;
+    name = name + 6;
+    name_len = name_len - 6;
+  } else if (strncmp("has_", name, 4) == 0 && name[name_len - 1] == '?' &&
+             !upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len,
+				    &test_f, &test_o)) {
+    accessor_type = METHOD_PRESENCE;
+    name = name + 4;
+    name_len = name_len - 5;
+  } else {
+    accessor_type = METHOD_GETTER;
+  }
+
+  // Verify the name corresponds to a oneof or field in this message.
+  if (!upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len,
+			     &test_f, &test_o)) {
+    return METHOD_UNKNOWN;
+  }
+
+  // Method calls like 'has_foo?' are not allowed if field "foo" does not have
+  // a hasbit (e.g. repeated fields or non-message type fields for proto3
+  // syntax).
+  if (accessor_type == METHOD_PRESENCE && test_f != NULL &&
+      !upb_fielddef_haspresence(test_f)) {
+    return METHOD_UNKNOWN;
+  }
+
+  *o = test_o;
+  *f = test_f;
+  return accessor_type;
 }
 
 /*
  * call-seq:
  *     Message.method_missing(*args)
  *
- * Provides accessors and setters for message fields according to their field
- * names. For any field whose name does not conflict with a built-in method, an
+ * Provides accessors and setters and methods to clear and check for presence of
+ * message fields according to their field names.
+ *
+ * For any field whose name does not conflict with a built-in method, an
  * accessor is provided with the same name as the field, and a setter is
  * provided with the name of the field plus the '=' suffix. Thus, given a
  * message instance 'msg' with field 'foo', the following code is valid:
@@ -129,13 +190,17 @@ static VALUE which_oneof_field(MessageHeader* self, const upb_oneofdef* o) {
  * This method also provides read-only accessors for oneofs. If a oneof exists
  * with name 'my_oneof', then msg.my_oneof will return a Ruby symbol equal to
  * the name of the field in that oneof that is currently set, or nil if none.
+ *
+ * It also provides methods of the form 'clear_fieldname' to clear the value
+ * of the field 'fieldname'. For basic data types, this will set the default
+ * value of the field.
+ *
+ * Additionally, it provides methods of the form 'has_fieldname?', which returns
+ * true if the field 'fieldname' is set in the message object, else false. For
+ * 'proto3' syntax, calling this for a basic type field will result in an error.
  */
 VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) {
   MessageHeader* self;
-  VALUE method_name, method_str;
-  char* name;
-  size_t name_len;
-  bool setter;
   const upb_oneofdef* o;
   const upb_fielddef* f;
 
@@ -143,54 +208,54 @@ VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) {
   if (argc < 1) {
     rb_raise(rb_eArgError, "Expected method name as first argument.");
   }
-  method_name = argv[0];
-  if (!SYMBOL_P(method_name)) {
-    rb_raise(rb_eArgError, "Expected symbol as method name.");
-  }
-  method_str = rb_id2str(SYM2ID(method_name));
-  name = RSTRING_PTR(method_str);
-  name_len = RSTRING_LEN(method_str);
-  setter = false;
 
-  // Setters have names that end in '='.
-  if (name[name_len - 1] == '=') {
-    setter = true;
-    name_len--;
-  }
-
-  // See if this name corresponds to either a oneof or field in this message.
-  if (!upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, &f,
-                             &o)) {
+  int accessor_type = extract_method_call(argv[0], self, &f, &o);
+  if (accessor_type == METHOD_UNKNOWN || (o == NULL && f == NULL) ) {
     return rb_call_super(argc, argv);
+  } else if (accessor_type == METHOD_SETTER) {
+    if (argc != 2) {
+      rb_raise(rb_eArgError, "Expected 2 arguments, received %d", argc);
+    }
+  } else if (argc != 1) {
+    rb_raise(rb_eArgError, "Expected 1 argument, received %d", argc);
   }
 
+  // Return which of the oneof fields are set
   if (o != NULL) {
-    // This is a oneof -- return which field inside the oneof is set.
-    if (setter) {
+    if (accessor_type == METHOD_SETTER) {
       rb_raise(rb_eRuntimeError, "Oneof accessors are read-only.");
     }
-    return which_oneof_field(self, o);
-  } else {
-    // This is a field -- get or set the field's value.
-    assert(f);
-    if (setter) {
-      if (argc < 2) {
-        rb_raise(rb_eArgError, "No value provided to setter.");
+
+    const upb_fielddef* oneof_field = which_oneof_field(self, o);
+    if (accessor_type == METHOD_PRESENCE) {
+      return oneof_field == NULL ? Qfalse : Qtrue;
+    } else if (accessor_type == METHOD_CLEAR) {
+      if (oneof_field != NULL) {
+	layout_clear(self->descriptor->layout, Message_data(self), oneof_field);
       }
-      layout_set(self->descriptor->layout, Message_data(self), f, argv[1]);
       return Qnil;
     } else {
-      return layout_get(self->descriptor->layout, Message_data(self), f);
+      // METHOD_ACCESSOR
+      return oneof_field == NULL ? Qnil :
+	ID2SYM(rb_intern(upb_fielddef_name(oneof_field)));
     }
+  // Otherwise we're operating on a single proto field
+  } else if (accessor_type == METHOD_SETTER) {
+    layout_set(self->descriptor->layout, Message_data(self), f, argv[1]);
+    return Qnil;
+  } else if (accessor_type == METHOD_CLEAR) {
+    layout_clear(self->descriptor->layout, Message_data(self), f);
+    return Qnil;
+  } else if (accessor_type == METHOD_PRESENCE) {
+    return layout_has(self->descriptor->layout, Message_data(self), f);
+  } else {
+    return layout_get(self->descriptor->layout, Message_data(self), f);
   }
 }
 
+
 VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) {
   MessageHeader* self;
-  VALUE method_name, method_str;
-  char* name;
-  size_t name_len;
-  bool setter;
   const upb_oneofdef* o;
   const upb_fielddef* f;
 
@@ -198,30 +263,15 @@ VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) {
   if (argc < 1) {
     rb_raise(rb_eArgError, "Expected method name as first argument.");
   }
-  method_name = argv[0];
-  if (!SYMBOL_P(method_name)) {
-    rb_raise(rb_eArgError, "Expected symbol as method name.");
-  }
-  method_str = rb_id2str(SYM2ID(method_name));
-  name = RSTRING_PTR(method_str);
-  name_len = RSTRING_LEN(method_str);
-  setter = false;
-
-  // Setters have names that end in '='.
-  if (name[name_len - 1] == '=') {
-    setter = true;
-    name_len--;
-  }
 
-  // See if this name corresponds to either a oneof or field in this message.
-  if (!upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, &f,
-                             &o)) {
+  int accessor_type = extract_method_call(argv[0], self, &f, &o);
+  if (accessor_type == METHOD_UNKNOWN) {
     return rb_call_super(argc, argv);
+  } else if (o != NULL) {
+    return accessor_type == METHOD_SETTER ? Qfalse : Qtrue;
+  } else {
+    return Qtrue;
   }
-  if (o != NULL) {
-    return setter ? Qfalse : Qtrue;
-  }
-  return Qtrue;
 }
 
 VALUE create_submsg_from_hash(const upb_fielddef *f, VALUE hash) {
@@ -444,13 +494,25 @@ VALUE Message_to_h(VALUE _self) {
        !upb_msg_field_done(&it);
        upb_msg_field_next(&it)) {
     const upb_fielddef* field = upb_msg_iter_field(&it);
+
+    // For proto2, do not include fields which are not set.
+    if (upb_msgdef_syntax(self->descriptor->msgdef) == UPB_SYNTAX_PROTO2 &&
+	field_contains_hasbit(self->descriptor->layout, field) &&
+	!layout_has(self->descriptor->layout, Message_data(self), field)) {
+      continue;
+    }
+
     VALUE msg_value = layout_get(self->descriptor->layout, Message_data(self),
                                  field);
     VALUE msg_key   = ID2SYM(rb_intern(upb_fielddef_name(field)));
-    if (upb_fielddef_ismap(field)) {
+    if (is_map_field(field)) {
       msg_value = Map_to_h(msg_value);
     } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
       msg_value = RepeatedField_to_ary(msg_value);
+      if (upb_msgdef_syntax(self->descriptor->msgdef) == UPB_SYNTAX_PROTO2 &&
+          RARRAY_LEN(msg_value) == 0) {
+        continue;
+      }
 
       if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
         for (int i = 0; i < RARRAY_LEN(msg_value); i++) {
@@ -458,6 +520,7 @@ VALUE Message_to_h(VALUE _self) {
           rb_ary_store(msg_value, i, Message_to_h(elem));
         }
       }
+
     } else if (msg_value != Qnil &&
                upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
       msg_value = Message_to_h(msg_value);

+ 2 - 0
ruby/ext/google/protobuf_c/protobuf.c

@@ -91,12 +91,14 @@ void Init_protobuf_c() {
   descriptor_instancevar_interned = rb_intern(kDescriptorInstanceVar);
   DescriptorPool_register(protobuf);
   Descriptor_register(protobuf);
+  FileDescriptor_register(protobuf);
   FieldDescriptor_register(protobuf);
   OneofDescriptor_register(protobuf);
   EnumDescriptor_register(protobuf);
   MessageBuilderContext_register(internal);
   OneofBuilderContext_register(internal);
   EnumBuilderContext_register(internal);
+  FileBuilderContext_register(internal);
   Builder_register(internal);
   RepeatedField_register(protobuf);
   Map_register(protobuf);

+ 58 - 1
ruby/ext/google/protobuf_c/protobuf.h

@@ -40,6 +40,7 @@
 // Forward decls.
 struct DescriptorPool;
 struct Descriptor;
+struct FileDescriptor;
 struct FieldDescriptor;
 struct EnumDescriptor;
 struct MessageLayout;
@@ -47,10 +48,12 @@ struct MessageField;
 struct MessageHeader;
 struct MessageBuilderContext;
 struct EnumBuilderContext;
+struct FileBuilderContext;
 struct Builder;
 
 typedef struct DescriptorPool DescriptorPool;
 typedef struct Descriptor Descriptor;
+typedef struct FileDescriptor FileDescriptor;
 typedef struct FieldDescriptor FieldDescriptor;
 typedef struct OneofDescriptor OneofDescriptor;
 typedef struct EnumDescriptor EnumDescriptor;
@@ -60,6 +63,7 @@ typedef struct MessageHeader MessageHeader;
 typedef struct MessageBuilderContext MessageBuilderContext;
 typedef struct OneofBuilderContext OneofBuilderContext;
 typedef struct EnumBuilderContext EnumBuilderContext;
+typedef struct FileBuilderContext FileBuilderContext;
 typedef struct Builder Builder;
 
 /*
@@ -118,6 +122,10 @@ struct Descriptor {
   const upb_handlers* json_serialize_handlers_preserve;
 };
 
+struct FileDescriptor {
+  const upb_filedef* filedef;
+};
+
 struct FieldDescriptor {
   const upb_fielddef* fielddef;
 };
@@ -145,18 +153,27 @@ struct EnumBuilderContext {
   VALUE enumdesc;
 };
 
+struct FileBuilderContext {
+  VALUE pending_list;
+  VALUE file_descriptor;
+  VALUE builder;
+};
+
 struct Builder {
   VALUE pending_list;
+  VALUE default_file_descriptor;
   upb_def** defs;  // used only while finalizing
 };
 
 extern VALUE cDescriptorPool;
 extern VALUE cDescriptor;
+extern VALUE cFileDescriptor;
 extern VALUE cFieldDescriptor;
 extern VALUE cEnumDescriptor;
 extern VALUE cMessageBuilderContext;
 extern VALUE cOneofBuilderContext;
 extern VALUE cEnumBuilderContext;
+extern VALUE cFileBuilderContext;
 extern VALUE cBuilder;
 
 extern VALUE cError;
@@ -175,7 +192,7 @@ VALUE DescriptorPool_alloc(VALUE klass);
 void DescriptorPool_register(VALUE module);
 DescriptorPool* ruby_to_DescriptorPool(VALUE value);
 VALUE DescriptorPool_add(VALUE _self, VALUE def);
-VALUE DescriptorPool_build(VALUE _self);
+VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self);
 VALUE DescriptorPool_lookup(VALUE _self, VALUE name);
 VALUE DescriptorPool_generated_pool(VALUE _self);
 
@@ -184,6 +201,7 @@ void Descriptor_free(void* _self);
 VALUE Descriptor_alloc(VALUE klass);
 void Descriptor_register(VALUE module);
 Descriptor* ruby_to_Descriptor(VALUE value);
+VALUE Descriptor_initialize(VALUE _self, VALUE file_descriptor_rb);
 VALUE Descriptor_name(VALUE _self);
 VALUE Descriptor_name_set(VALUE _self, VALUE str);
 VALUE Descriptor_each(VALUE _self);
@@ -193,8 +211,19 @@ VALUE Descriptor_add_oneof(VALUE _self, VALUE obj);
 VALUE Descriptor_each_oneof(VALUE _self);
 VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name);
 VALUE Descriptor_msgclass(VALUE _self);
+VALUE Descriptor_file_descriptor(VALUE _self);
 extern const rb_data_type_t _Descriptor_type;
 
+void FileDescriptor_mark(void* _self);
+void FileDescriptor_free(void* _self);
+VALUE FileDescriptor_alloc(VALUE klass);
+void FileDescriptor_register(VALUE module);
+FileDescriptor* ruby_to_FileDescriptor(VALUE value);
+VALUE FileDescriptor_initialize(int argc, VALUE* argv, VALUE _self);
+VALUE FileDescriptor_name(VALUE _self);
+VALUE FileDescriptor_syntax(VALUE _self);
+VALUE FileDescriptor_syntax_set(VALUE _self, VALUE syntax);
+
 void FieldDescriptor_mark(void* _self);
 void FieldDescriptor_free(void* _self);
 VALUE FieldDescriptor_alloc(VALUE klass);
@@ -204,6 +233,8 @@ VALUE FieldDescriptor_name(VALUE _self);
 VALUE FieldDescriptor_name_set(VALUE _self, VALUE str);
 VALUE FieldDescriptor_type(VALUE _self);
 VALUE FieldDescriptor_type_set(VALUE _self, VALUE type);
+VALUE FieldDescriptor_default(VALUE _self);
+VALUE FieldDescriptor_default_set(VALUE _self, VALUE default_value);
 VALUE FieldDescriptor_label(VALUE _self);
 VALUE FieldDescriptor_label_set(VALUE _self, VALUE label);
 VALUE FieldDescriptor_number(VALUE _self);
@@ -211,6 +242,8 @@ VALUE FieldDescriptor_number_set(VALUE _self, VALUE number);
 VALUE FieldDescriptor_submsg_name(VALUE _self);
 VALUE FieldDescriptor_submsg_name_set(VALUE _self, VALUE value);
 VALUE FieldDescriptor_subtype(VALUE _self);
+VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb);
+VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb);
 VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb);
 VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value);
 upb_fieldtype_t ruby_to_fieldtype(VALUE type);
@@ -231,6 +264,8 @@ void EnumDescriptor_free(void* _self);
 VALUE EnumDescriptor_alloc(VALUE klass);
 void EnumDescriptor_register(VALUE module);
 EnumDescriptor* ruby_to_EnumDescriptor(VALUE value);
+VALUE EnumDescriptor_initialize(VALUE _self, VALUE file_descriptor_rb);
+VALUE EnumDescriptor_file_descriptor(VALUE _self);
 VALUE EnumDescriptor_name(VALUE _self);
 VALUE EnumDescriptor_name_set(VALUE _self, VALUE str);
 VALUE EnumDescriptor_add_value(VALUE _self, VALUE name, VALUE number);
@@ -272,12 +307,23 @@ EnumBuilderContext* ruby_to_EnumBuilderContext(VALUE value);
 VALUE EnumBuilderContext_initialize(VALUE _self, VALUE enumdesc);
 VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number);
 
+void FileBuilderContext_mark(void* _self);
+void FileBuilderContext_free(void* _self);
+VALUE FileBuilderContext_alloc(VALUE klass);
+void FileBuilderContext_register(VALUE module);
+VALUE FileBuilderContext_initialize(VALUE _self, VALUE file_descriptor,
+				    VALUE builder);
+VALUE FileBuilderContext_add_message(VALUE _self, VALUE name);
+VALUE FileBuilderContext_add_enum(VALUE _self, VALUE name);
+VALUE FileBuilderContext_pending_descriptors(VALUE _self);
+
 void Builder_mark(void* _self);
 void Builder_free(void* _self);
 VALUE Builder_alloc(VALUE klass);
 void Builder_register(VALUE module);
 Builder* ruby_to_Builder(VALUE value);
 VALUE Builder_initialize(VALUE _self);
+VALUE Builder_add_file(int argc, VALUE *argv, VALUE _self);
 VALUE Builder_add_message(VALUE _self, VALUE name);
 VALUE Builder_add_enum(VALUE _self, VALUE name);
 VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb);
@@ -443,10 +489,12 @@ VALUE Map_iter_value(Map_iter* iter);
 // -----------------------------------------------------------------------------
 
 #define MESSAGE_FIELD_NO_CASE ((size_t)-1)
+#define MESSAGE_FIELD_NO_HASBIT ((size_t)-1)
 
 struct MessageField {
   size_t offset;
   size_t case_offset;  // for oneofs, a uint32. Else, MESSAGE_FIELD_NO_CASE.
+  size_t hasbit;
 };
 
 struct MessageLayout {
@@ -457,6 +505,9 @@ struct MessageLayout {
 
 MessageLayout* create_layout(const upb_msgdef* msgdef);
 void free_layout(MessageLayout* layout);
+bool field_contains_hasbit(MessageLayout* layout,
+                 const upb_fielddef* field);
+VALUE layout_get_default(const upb_fielddef* field);
 VALUE layout_get(MessageLayout* layout,
                  const void* storage,
                  const upb_fielddef* field);
@@ -464,6 +515,12 @@ void layout_set(MessageLayout* layout,
                 void* storage,
                 const upb_fielddef* field,
                 VALUE val);
+VALUE layout_has(MessageLayout* layout,
+                 const void* storage,
+                 const upb_fielddef* field);
+void layout_clear(MessageLayout* layout,
+                 const void* storage,
+                 const upb_fielddef* field);
 void layout_init(MessageLayout* layout, void* storage);
 void layout_mark(MessageLayout* layout, void* storage);
 void layout_dup(MessageLayout* layout, void* to, void* from);

+ 194 - 87
ruby/ext/google/protobuf_c/storage.c

@@ -38,6 +38,8 @@
 // Ruby <-> native slot management.
 // -----------------------------------------------------------------------------
 
+#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs)
+#define DEREF_OFFSET(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs)
 #define DEREF(memory, type) *(type*)(memory)
 
 size_t native_slot_size(upb_fieldtype_t type) {
@@ -57,37 +59,6 @@ size_t native_slot_size(upb_fieldtype_t type) {
   }
 }
 
-static VALUE value_from_default(const upb_fielddef *field) {
-  switch (upb_fielddef_type(field)) {
-    case UPB_TYPE_FLOAT:   return DBL2NUM(upb_fielddef_defaultfloat(field));
-    case UPB_TYPE_DOUBLE:  return DBL2NUM(upb_fielddef_defaultdouble(field));
-    case UPB_TYPE_BOOL:
-      return upb_fielddef_defaultbool(field) ? Qtrue : Qfalse;
-    case UPB_TYPE_MESSAGE: return Qnil;
-    case UPB_TYPE_ENUM: {
-      const upb_enumdef *enumdef = upb_fielddef_enumsubdef(field);
-      int32_t num = upb_fielddef_defaultint32(field);
-      const char *label = upb_enumdef_iton(enumdef, num);
-      if (label) {
-        return ID2SYM(rb_intern(label));
-      } else {
-        return INT2NUM(num);
-      }
-    }
-    case UPB_TYPE_INT32:   return INT2NUM(upb_fielddef_defaultint32(field));
-    case UPB_TYPE_INT64:   return LL2NUM(upb_fielddef_defaultint64(field));;
-    case UPB_TYPE_UINT32:  return UINT2NUM(upb_fielddef_defaultuint32(field));
-    case UPB_TYPE_UINT64:  return ULL2NUM(upb_fielddef_defaultuint64(field));
-    case UPB_TYPE_STRING:
-    case UPB_TYPE_BYTES: {
-      size_t size;
-      const char *str = upb_fielddef_defaultstr(field, &size);
-      return rb_str_new(str, size);
-    }
-    default: return Qnil;
-  }
-}
-
 static bool is_ruby_num(VALUE value) {
   return (TYPE(value) == T_FLOAT ||
           TYPE(value) == T_FIXNUM ||
@@ -404,7 +375,12 @@ const upb_msgdef *map_entry_msgdef(const upb_fielddef* field) {
 }
 
 bool is_map_field(const upb_fielddef *field) {
-  return tryget_map_entry_msgdef(field) != NULL;
+  const upb_msgdef* subdef = tryget_map_entry_msgdef(field);
+  if (subdef == NULL) return false;
+
+  // Map fields are a proto3 feature.
+  // If we're using proto2 syntax we need to fallback to the repeated field.
+  return upb_msgdef_syntax(subdef) == UPB_SYNTAX_PROTO3;
 }
 
 const upb_fielddef* map_field_key(const upb_fielddef* field) {
@@ -433,6 +409,12 @@ const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) {
 // Memory layout management.
 // -----------------------------------------------------------------------------
 
+bool field_contains_hasbit(MessageLayout* layout,
+                            const upb_fielddef* field) {
+  return layout->fields[upb_fielddef_index(field)].hasbit !=
+      MESSAGE_FIELD_NO_HASBIT;
+}
+
 static size_t align_up_to(size_t offset, size_t granularity) {
   // Granularity must be a power of two.
   return (offset + granularity - 1) & ~(granularity - 1);
@@ -447,6 +429,23 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
 
   layout->fields = ALLOC_N(MessageField, nfields);
 
+  size_t hasbit = 0;
+  for (upb_msg_field_begin(&it, msgdef);
+       !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* field = upb_msg_iter_field(&it);
+    if (upb_fielddef_haspresence(field)) {
+      layout->fields[upb_fielddef_index(field)].hasbit = hasbit++;
+    } else {
+      layout->fields[upb_fielddef_index(field)].hasbit =
+	  MESSAGE_FIELD_NO_HASBIT;
+    }
+  }
+
+  if (hasbit != 0) {
+    off += (hasbit + 8 - 1) / 8;
+  }
+
   for (upb_msg_field_begin(&it, msgdef);
        !upb_msg_field_done(&it);
        upb_msg_field_next(&it)) {
@@ -569,6 +568,136 @@ static uint32_t* slot_oneof_case(MessageLayout* layout,
       layout->fields[upb_fielddef_index(field)].case_offset);
 }
 
+static void slot_set_hasbit(MessageLayout* layout,
+                            const void* storage,
+                            const upb_fielddef* field) {
+  size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
+  assert(hasbit != MESSAGE_FIELD_NO_HASBIT);
+
+  ((uint8_t*)storage)[hasbit / 8] |= 1 << (hasbit % 8);
+}
+
+static void slot_clear_hasbit(MessageLayout* layout,
+                              const void* storage,
+                              const upb_fielddef* field) {
+  size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
+  assert(hasbit != MESSAGE_FIELD_NO_HASBIT);
+  ((uint8_t*)storage)[hasbit / 8] &= ~(1 << (hasbit % 8));
+}
+
+static bool slot_is_hasbit_set(MessageLayout* layout,
+                            const void* storage,
+                            const upb_fielddef* field) {
+  size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit;
+  if (hasbit == MESSAGE_FIELD_NO_HASBIT) {
+    return false;
+  }
+
+  return DEREF_OFFSET(
+      (uint8_t*)storage, hasbit / 8, char) & (1 << (hasbit % 8));
+}
+
+VALUE layout_has(MessageLayout* layout,
+                 const void* storage,
+                 const upb_fielddef* field) {
+  assert(field_contains_hasbit(layout, field));
+  return slot_is_hasbit_set(layout, storage, field) ? Qtrue : Qfalse;
+}
+
+void layout_clear(MessageLayout* layout,
+                 const void* storage,
+                 const upb_fielddef* field) {
+  void* memory = slot_memory(layout, storage, field);
+  uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
+
+  if (field_contains_hasbit(layout, field)) {
+    slot_clear_hasbit(layout, storage, field);
+  }
+
+  if (upb_fielddef_containingoneof(field)) {
+    memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
+    *oneof_case = ONEOF_CASE_NONE;
+  } else if (is_map_field(field)) {
+    VALUE map = Qnil;
+
+    const upb_fielddef* key_field = map_field_key(field);
+    const upb_fielddef* value_field = map_field_value(field);
+    VALUE type_class = field_type_class(value_field);
+
+    if (type_class != Qnil) {
+      VALUE args[3] = {
+        fieldtype_to_ruby(upb_fielddef_type(key_field)),
+        fieldtype_to_ruby(upb_fielddef_type(value_field)),
+        type_class,
+      };
+      map = rb_class_new_instance(3, args, cMap);
+    } else {
+      VALUE args[2] = {
+        fieldtype_to_ruby(upb_fielddef_type(key_field)),
+        fieldtype_to_ruby(upb_fielddef_type(value_field)),
+      };
+      map = rb_class_new_instance(2, args, cMap);
+    }
+
+    DEREF(memory, VALUE) = map;
+  } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+    VALUE ary = Qnil;
+
+    VALUE type_class = field_type_class(field);
+
+    if (type_class != Qnil) {
+      VALUE args[2] = {
+        fieldtype_to_ruby(upb_fielddef_type(field)),
+        type_class,
+      };
+      ary = rb_class_new_instance(2, args, cRepeatedField);
+    } else {
+      VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
+      ary = rb_class_new_instance(1, args, cRepeatedField);
+    }
+
+    DEREF(memory, VALUE) = ary;
+  } else {
+    native_slot_set(upb_fielddef_type(field), field_type_class(field),
+                      memory, layout_get_default(field));
+  }
+}
+
+VALUE layout_get_default(const upb_fielddef *field) {
+  switch (upb_fielddef_type(field)) {
+    case UPB_TYPE_FLOAT:   return DBL2NUM(upb_fielddef_defaultfloat(field));
+    case UPB_TYPE_DOUBLE:  return DBL2NUM(upb_fielddef_defaultdouble(field));
+    case UPB_TYPE_BOOL:
+      return upb_fielddef_defaultbool(field) ? Qtrue : Qfalse;
+    case UPB_TYPE_MESSAGE: return Qnil;
+    case UPB_TYPE_ENUM: {
+      const upb_enumdef *enumdef = upb_fielddef_enumsubdef(field);
+      int32_t num = upb_fielddef_defaultint32(field);
+      const char *label = upb_enumdef_iton(enumdef, num);
+      if (label) {
+        return ID2SYM(rb_intern(label));
+      } else {
+        return INT2NUM(num);
+      }
+    }
+    case UPB_TYPE_INT32:   return INT2NUM(upb_fielddef_defaultint32(field));
+    case UPB_TYPE_INT64:   return LL2NUM(upb_fielddef_defaultint64(field));;
+    case UPB_TYPE_UINT32:  return UINT2NUM(upb_fielddef_defaultuint32(field));
+    case UPB_TYPE_UINT64:  return ULL2NUM(upb_fielddef_defaultuint64(field));
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      size_t size;
+      const char *str = upb_fielddef_defaultstr(field, &size);
+      VALUE str_rb = rb_str_new(str, size);
+
+      rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
+                 kRubyString8bitEncoding : kRubyStringUtf8Encoding);
+      rb_obj_freeze(str_rb);
+      return str_rb;
+    }
+    default: return Qnil;
+  }
+}
 
 VALUE layout_get(MessageLayout* layout,
                  const void* storage,
@@ -576,15 +705,24 @@ VALUE layout_get(MessageLayout* layout,
   void* memory = slot_memory(layout, storage, field);
   uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
 
+  bool field_set;
+  if (field_contains_hasbit(layout, field)) {
+    field_set = slot_is_hasbit_set(layout, storage, field);
+  } else {
+    field_set = true;
+  }
+
   if (upb_fielddef_containingoneof(field)) {
     if (*oneof_case != upb_fielddef_number(field)) {
-      return value_from_default(field);
+      return layout_get_default(field);
     }
     return native_slot_get(upb_fielddef_type(field),
                            field_type_class(field),
                            memory);
   } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
     return *((VALUE *)memory);
+  } else if (!field_set) {
+    return layout_get_default(field);
   } else {
     return native_slot_get(upb_fielddef_type(field),
                            field_type_class(field),
@@ -689,67 +827,24 @@ void layout_set(MessageLayout* layout,
     check_repeated_field_type(val, field);
     DEREF(memory, VALUE) = val;
   } else {
-    native_slot_set(upb_fielddef_type(field), field_type_class(field),
-                    memory, val);
+    native_slot_set(upb_fielddef_type(field), field_type_class(field), memory,
+		    val);
+  }
+
+  if (layout->fields[upb_fielddef_index(field)].hasbit !=
+      MESSAGE_FIELD_NO_HASBIT) {
+    slot_set_hasbit(layout, storage, field);
   }
 }
 
 void layout_init(MessageLayout* layout,
                  void* storage) {
+
   upb_msg_field_iter it;
   for (upb_msg_field_begin(&it, layout->msgdef);
        !upb_msg_field_done(&it);
        upb_msg_field_next(&it)) {
-    const upb_fielddef* field = upb_msg_iter_field(&it);
-    void* memory = slot_memory(layout, storage, field);
-    uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
-
-    if (upb_fielddef_containingoneof(field)) {
-      memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
-      *oneof_case = ONEOF_CASE_NONE;
-    } else if (is_map_field(field)) {
-      VALUE map = Qnil;
-
-      const upb_fielddef* key_field = map_field_key(field);
-      const upb_fielddef* value_field = map_field_value(field);
-      VALUE type_class = field_type_class(value_field);
-
-      if (type_class != Qnil) {
-        VALUE args[3] = {
-          fieldtype_to_ruby(upb_fielddef_type(key_field)),
-          fieldtype_to_ruby(upb_fielddef_type(value_field)),
-          type_class,
-        };
-        map = rb_class_new_instance(3, args, cMap);
-      } else {
-        VALUE args[2] = {
-          fieldtype_to_ruby(upb_fielddef_type(key_field)),
-          fieldtype_to_ruby(upb_fielddef_type(value_field)),
-        };
-        map = rb_class_new_instance(2, args, cMap);
-      }
-
-      DEREF(memory, VALUE) = map;
-    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
-      VALUE ary = Qnil;
-
-      VALUE type_class = field_type_class(field);
-
-      if (type_class != Qnil) {
-        VALUE args[2] = {
-          fieldtype_to_ruby(upb_fielddef_type(field)),
-          type_class,
-        };
-        ary = rb_class_new_instance(2, args, cRepeatedField);
-      } else {
-        VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
-        ary = rb_class_new_instance(1, args, cRepeatedField);
-      }
-
-      DEREF(memory, VALUE) = ary;
-    } else {
-      native_slot_init(upb_fielddef_type(field), memory);
-    }
+    layout_clear(layout, storage, upb_msg_iter_field(&it));
   }
 }
 
@@ -796,6 +891,11 @@ void layout_dup(MessageLayout* layout, void* to, void* from) {
     } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
       DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
     } else {
+      if (field_contains_hasbit(layout, field)) {
+        if (!slot_is_hasbit_set(layout, from, field)) continue;
+        slot_set_hasbit(layout, to, field);
+      }
+
       native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
     }
   }
@@ -825,6 +925,11 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) {
       DEREF(to_memory, VALUE) =
           RepeatedField_deep_copy(DEREF(from_memory, VALUE));
     } else {
+      if (field_contains_hasbit(layout, field)) {
+        if (!slot_is_hasbit_set(layout, from, field)) continue;
+        slot_set_hasbit(layout, to, field);
+      }
+
       native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
     }
   }
@@ -861,8 +966,10 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) {
         return Qfalse;
       }
     } else {
-      if (!native_slot_eq(upb_fielddef_type(field),
-                          msg1_memory, msg2_memory)) {
+      if (slot_is_hasbit_set(layout, msg1, field) !=
+	  slot_is_hasbit_set(layout, msg2, field) ||
+          !native_slot_eq(upb_fielddef_type(field),
+			  msg1_memory, msg2_memory)) {
         return Qfalse;
       }
     }

File diff suppressed because it is too large
+ 108 - 843
ruby/tests/basic.rb


+ 265 - 0
ruby/tests/basic_proto2.rb

@@ -0,0 +1,265 @@
+#!/usr/bin/ruby
+
+# basic_test_pb.rb is in the same directory as this test.
+$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
+
+require 'basic_test_proto2_pb'
+require 'common_tests'
+require 'google/protobuf'
+require 'json'
+require 'test/unit'
+
+# ------------- generated code --------------
+
+module BasicTestProto2
+  pool = Google::Protobuf::DescriptorPool.new
+  pool.build do
+    add_file "test_proto2.proto", syntax: :proto2 do
+      add_message "BadFieldNames" do
+        optional :dup, :int32, 1
+        optional :class, :int32, 2
+        optional :"a.b", :int32, 3
+      end
+    end
+  end
+
+  BadFieldNames = pool.lookup("BadFieldNames").msgclass
+
+# ------------ test cases ---------------
+
+  class MessageContainerTest < Test::Unit::TestCase
+    # Required by CommonTests module to resolve proto2 proto classes used in tests.
+    def proto_module
+      ::BasicTestProto2
+    end
+    include CommonTests
+
+    def test_has_field
+      m = TestMessage.new
+      assert_false m.has_optional_int32?
+      assert_false TestMessage.descriptor.lookup('optional_int32').has?(m)
+      assert_false m.has_optional_int64?
+      assert_false TestMessage.descriptor.lookup('optional_int64').has?(m)
+      assert_false m.has_optional_uint32?
+      assert_false TestMessage.descriptor.lookup('optional_uint32').has?(m)
+      assert_false m.has_optional_uint64?
+      assert_false TestMessage.descriptor.lookup('optional_uint64').has?(m)
+      assert_false m.has_optional_bool?
+      assert_false TestMessage.descriptor.lookup('optional_bool').has?(m)
+      assert_false m.has_optional_float?
+      assert_false TestMessage.descriptor.lookup('optional_float').has?(m)
+      assert_false m.has_optional_double?
+      assert_false TestMessage.descriptor.lookup('optional_double').has?(m)
+      assert_false m.has_optional_string?
+      assert_false TestMessage.descriptor.lookup('optional_string').has?(m)
+      assert_false m.has_optional_bytes?
+      assert_false TestMessage.descriptor.lookup('optional_bytes').has?(m)
+      assert_false m.has_optional_enum?
+      assert_false TestMessage.descriptor.lookup('optional_enum').has?(m)
+
+      m = TestMessage.new(:optional_int32 => nil)
+      assert_false m.has_optional_int32?
+
+      assert_raise NoMethodError do
+        m.has_repeated_msg?
+      end
+      assert_raise ArgumentError do
+        TestMessage.descriptor.lookup('repeated_msg').has?(m)
+      end
+
+      m.optional_msg = TestMessage2.new
+      assert_true m.has_optional_msg?
+      assert_true TestMessage.descriptor.lookup('optional_msg').has?(m)
+
+      m = OneofMessage.new
+      assert_false m.has_my_oneof?
+      m.a = "foo"
+      assert_true m.has_a?
+      assert_true OneofMessage.descriptor.lookup('a').has?(m)
+      assert_equal "foo", m.a
+      assert_true m.has_my_oneof?
+      assert_false m.has_b?
+      assert_false OneofMessage.descriptor.lookup('b').has?(m)
+      assert_false m.has_c?
+      assert_false OneofMessage.descriptor.lookup('c').has?(m)
+      assert_false m.has_d?
+      assert_false OneofMessage.descriptor.lookup('d').has?(m)
+
+      m = OneofMessage.new
+      m.b = 100
+      assert_true m.has_b?
+      assert_equal 100, m.b
+      assert_true m.has_my_oneof?
+      assert_false m.has_a?
+      assert_false m.has_c?
+      assert_false m.has_d?
+
+      m = OneofMessage.new
+      m.c = TestMessage2.new
+      assert_true m.has_c?
+      assert_equal TestMessage2.new, m.c
+      assert_true m.has_my_oneof?
+      assert_false m.has_a?
+      assert_false m.has_b?
+      assert_false m.has_d?
+
+      m = OneofMessage.new
+      m.d = :A
+      assert_true m.has_d?
+      assert_equal :A, m.d
+      assert_true m.has_my_oneof?
+      assert_false m.has_a?
+      assert_false m.has_b?
+      assert_false m.has_c?
+    end
+
+    def test_defined_defaults
+      m = TestMessageDefaults.new
+      assert_equal 1, m.optional_int32
+      assert_equal 2, m.optional_int64
+      assert_equal 3, m.optional_uint32
+      assert_equal 4, m.optional_uint64
+      assert_equal true, m.optional_bool
+      assert_equal 6.0, m.optional_float
+      assert_equal 7.0, m.optional_double
+      assert_equal "Default Str", m.optional_string
+      assert_equal "\xCF\xA5s\xBD\xBA\xE6fubar".force_encoding("ASCII-8BIT"), m.optional_bytes
+      assert_equal :B2, m.optional_enum
+
+      assert_false m.has_optional_int32?
+      assert_false m.has_optional_int64?
+      assert_false m.has_optional_uint32?
+      assert_false m.has_optional_uint64?
+      assert_false m.has_optional_bool?
+      assert_false m.has_optional_float?
+      assert_false m.has_optional_double?
+      assert_false m.has_optional_string?
+      assert_false m.has_optional_bytes?
+      assert_false m.has_optional_enum?
+    end
+
+    def test_set_clear_defaults
+      m = TestMessageDefaults.new
+
+      m.optional_int32 = -42
+      assert_equal -42, m.optional_int32
+      assert_true m.has_optional_int32?
+      m.clear_optional_int32
+      assert_equal 1, m.optional_int32
+      assert_false m.has_optional_int32?
+
+      m.optional_string = "foo bar"
+      assert_equal "foo bar", m.optional_string
+      assert_true m.has_optional_string?
+      m.clear_optional_string
+      assert_equal "Default Str", m.optional_string
+      assert_false m.has_optional_string?
+
+      m.optional_msg = TestMessage2.new(:foo => 42)
+      assert_equal TestMessage2.new(:foo => 42), m.optional_msg
+      assert_true m.has_optional_msg?
+
+      m.clear_optional_msg
+      assert_equal nil, m.optional_msg
+      assert_false m.has_optional_msg?
+
+      m.optional_msg = TestMessage2.new(:foo => 42)
+      assert_equal TestMessage2.new(:foo => 42), m.optional_msg
+      assert_true TestMessageDefaults.descriptor.lookup('optional_msg').has?(m)
+
+      TestMessageDefaults.descriptor.lookup('optional_msg').clear(m)
+      assert_equal nil, m.optional_msg
+      assert_false TestMessageDefaults.descriptor.lookup('optional_msg').has?(m)
+
+      m = TestMessage.new
+      m.repeated_int32.push(1)
+      assert_equal [1], m.repeated_int32
+      m.clear_repeated_int32
+      assert_equal [], m.repeated_int32
+
+      m = OneofMessage.new
+      m.a = "foo"
+      assert_equal "foo", m.a
+      assert_true m.has_a?
+      m.clear_a
+      assert_false m.has_a?
+
+      m = OneofMessage.new
+      m.a = "foobar"
+      assert_true m.has_my_oneof?
+      m.clear_my_oneof
+      assert_false m.has_my_oneof?
+
+      m = OneofMessage.new
+      m.a = "bar"
+      assert_equal "bar", m.a
+      assert_true m.has_my_oneof?
+      OneofMessage.descriptor.lookup('a').clear(m)
+      assert_false m.has_my_oneof?
+    end
+
+    def test_initialization_map_errors
+      e = assert_raise ArgumentError do
+        TestMessage.new(:hello => "world")
+      end
+      assert_match(/hello/, e.message)
+
+      e = assert_raise ArgumentError do
+        TestMessage.new(:repeated_uint32 => "hello")
+      end
+      assert_equal e.message, "Expected array as initializer value for repeated field 'repeated_uint32'."
+    end
+
+
+    def test_to_h
+      m = TestMessage.new(:optional_bool => true, :optional_double => -10.100001, :optional_string => 'foo', :repeated_string => ['bar1', 'bar2'])
+      expected_result = {
+        :optional_bool=>true,
+        :optional_double=>-10.100001,
+        :optional_string=>"foo",
+        :repeated_string=>["bar1", "bar2"],
+      }
+      assert_equal expected_result, m.to_h
+
+      m = OneofMessage.new(:a => "foo")
+      expected_result = {:a => "foo"}
+      assert_equal expected_result, m.to_h
+    end
+
+    def test_map_keyword_disabled
+      pool = Google::Protobuf::DescriptorPool.new
+
+      e = assert_raise ArgumentError do
+        pool.build do
+          add_file 'test_file.proto', syntax: :proto2 do
+            add_message "MapMessage" do
+              map :map_string_int32, :string, :int32, 1
+              map :map_string_msg, :string, :message, 2, "TestMessage2"
+            end
+          end
+        end
+      end
+
+      assert_match(/Cannot add a native map/, e.message)
+    end
+
+    def test_respond_to
+      # This test fails with JRuby 1.7.23, likely because of an old JRuby bug.
+      return if RUBY_PLATFORM == "java"
+      msg = TestMessage.new
+      assert !msg.respond_to?(:bacon)
+    end
+
+    def test_file_descriptor
+      file_descriptor = TestMessage.descriptor.file_descriptor
+      assert_true nil != file_descriptor
+      assert_equal "tests/basic_test_proto2.proto", file_descriptor.name
+      assert_equal :proto2, file_descriptor.syntax
+
+      file_descriptor = TestEnum.descriptor.file_descriptor
+      assert_true nil != file_descriptor
+      assert_equal "tests/basic_test_proto2.proto", file_descriptor.name
+      assert_equal :proto2, file_descriptor.syntax
+    end
+  end
+end

+ 109 - 0
ruby/tests/basic_test.proto

@@ -0,0 +1,109 @@
+syntax = "proto3";
+
+package basic_test;
+
+message Foo {
+  Bar bar = 1;
+  repeated Baz baz = 2;
+}
+
+message Bar {
+  string msg = 1;
+}
+
+message Baz {
+  string msg = 1;
+}
+
+message TestMessage {
+  int32 optional_int32 = 1;
+  int64 optional_int64 = 2;
+  uint32 optional_uint32 = 3;
+  uint64 optional_uint64 = 4;
+  bool optional_bool = 5;
+  float optional_float = 6;
+  double optional_double = 7;
+  string optional_string = 8;
+  bytes optional_bytes = 9;
+  TestMessage2 optional_msg = 10;
+  TestEnum optional_enum = 11;
+
+  repeated int32 repeated_int32 = 12;
+  repeated int64 repeated_int64 = 13;
+  repeated uint32 repeated_uint32 = 14;
+  repeated uint64 repeated_uint64 = 15;
+  repeated bool repeated_bool = 16;
+  repeated float repeated_float = 17;
+  repeated double repeated_double = 18;
+  repeated string repeated_string = 19;
+  repeated bytes repeated_bytes = 20;
+  repeated TestMessage2 repeated_msg = 21;
+  repeated TestEnum repeated_enum = 22;
+}
+
+message TestMessage2 {
+  int32 foo = 1;
+}
+
+enum TestEnum {
+  Default = 0;
+  A = 1;
+  B = 2;
+  C = 3;
+}
+
+message TestEmbeddedMessageParent {
+  TestEmbeddedMessageChild child_msg = 1;
+  int32 number = 2;
+
+  repeated TestEmbeddedMessageChild repeated_msg = 3;
+  repeated int32 repeated_number = 4;
+}
+
+message TestEmbeddedMessageChild {
+  TestMessage sub_child = 1;
+}
+
+message Recursive1 {
+  Recursive2 foo = 1;
+}
+
+message Recursive2 {
+  Recursive1 foo = 1;
+}
+
+message MapMessage {
+  map<string, int32> map_string_int32 = 1;
+  map<string, TestMessage2> map_string_msg = 2;
+}
+
+message MapMessageWireEquiv {
+  repeated MapMessageWireEquiv_entry1 map_string_int32 = 1;
+  repeated MapMessageWireEquiv_entry2 map_string_msg = 2;
+}
+
+message MapMessageWireEquiv_entry1 {
+  string key = 1;
+  int32 value = 2;
+}
+
+message MapMessageWireEquiv_entry2 {
+  string key = 1;
+  TestMessage2 value = 2;
+}
+
+message OneofMessage {
+  oneof my_oneof {
+    string a = 1;
+    int32 b = 2;
+    TestMessage2 c = 3;
+    TestEnum d = 4;
+  }
+}
+
+message Outer {
+  map<int32, Inner> items = 1;
+}
+
+message Inner {
+}

+ 117 - 0
ruby/tests/basic_test_proto2.proto

@@ -0,0 +1,117 @@
+syntax = "proto2";
+
+package basic_test_proto2;
+
+message Foo {
+  optional Bar bar = 1;
+  repeated Baz baz = 2;
+}
+
+message Bar {
+  optional string msg = 1;
+}
+
+message Baz {
+  optional string msg = 1;
+}
+
+message TestMessage {
+  optional int32 optional_int32 = 1;
+  optional int64 optional_int64 = 2;
+  optional uint32 optional_uint32 = 3;
+  optional uint64 optional_uint64 = 4;
+  optional bool optional_bool = 5;
+  optional float optional_float = 6;
+  optional double optional_double = 7;
+  optional string optional_string = 8;
+  optional bytes optional_bytes = 9;
+  optional TestMessage2 optional_msg = 10;
+  optional TestEnum optional_enum = 11;
+
+  repeated int32 repeated_int32 = 12;
+  repeated int64 repeated_int64 = 13;
+  repeated uint32 repeated_uint32 = 14;
+  repeated uint64 repeated_uint64 = 15;
+  repeated bool repeated_bool = 16;
+  repeated float repeated_float = 17;
+  repeated double repeated_double = 18;
+  repeated string repeated_string = 19;
+  repeated bytes repeated_bytes = 20;
+  repeated TestMessage2 repeated_msg = 21;
+  repeated TestEnum repeated_enum = 22;
+}
+
+message TestMessage2 {
+  optional int32 foo = 1;
+}
+
+message TestMessageDefaults {
+  optional int32 optional_int32 = 1 [default = 1];
+  optional int64 optional_int64 = 2 [default = 2];
+  optional uint32 optional_uint32 = 3 [default = 3];
+  optional uint64 optional_uint64 = 4 [default = 4];
+  optional bool optional_bool = 5 [default = true];
+  optional float optional_float = 6 [default = 6];
+  optional double optional_double = 7 [default = 7];
+  optional string optional_string = 8 [default = "Default Str"];
+  optional bytes optional_bytes = 9 [default = "\xCF\xA5s\xBD\xBA\xE6fubar"];
+  optional TestMessage2 optional_msg = 10;
+  optional TestNonZeroEnum optional_enum = 11 [default = B2];
+}
+
+enum TestEnum {
+  Default = 0;
+  A = 1;
+  B = 2;
+  C = 3;
+}
+
+enum TestNonZeroEnum {
+  A2 = 1;
+  B2 = 2;
+  C2 = 3;
+}
+
+message TestEmbeddedMessageParent {
+  optional TestEmbeddedMessageChild child_msg = 1;
+  optional int32 number = 2;
+
+  repeated TestEmbeddedMessageChild repeated_msg = 3;
+  repeated int32 repeated_number = 4;
+}
+
+message TestEmbeddedMessageChild {
+  optional TestMessage sub_child = 1;
+}
+
+message Recursive1 {
+  optional Recursive2 foo = 1;
+}
+
+message Recursive2 {
+  optional Recursive1 foo = 1;
+}
+
+message MapMessageWireEquiv {
+  repeated MapMessageWireEquiv_entry1 map_string_int32 = 1;
+  repeated MapMessageWireEquiv_entry2 map_string_msg = 2;
+}
+
+message MapMessageWireEquiv_entry1 {
+  optional string key = 1;
+  optional int32 value = 2;
+}
+
+message MapMessageWireEquiv_entry2 {
+  optional string key = 1;
+  optional TestMessage2 value = 2;
+}
+
+message OneofMessage {
+  oneof my_oneof {
+    string a = 1;
+    int32 b = 2;
+    TestMessage2 c = 3;
+    TestEnum d = 4;
+  }
+}

+ 1117 - 0
ruby/tests/common_tests.rb

@@ -0,0 +1,1117 @@
+# Defines tests which are common between proto2 and proto3 syntax.
+#
+# Requires that the proto messages are exactly the same in proto2 and proto3 syntax
+# and that the including class should define a 'proto_module' method which returns
+# the enclosing module of the proto message classes.
+module CommonTests
+  def test_defaults
+    m = proto_module::TestMessage.new
+    assert m.optional_int32 == 0
+    assert m.optional_int64 == 0
+    assert m.optional_uint32 == 0
+    assert m.optional_uint64 == 0
+    assert m.optional_bool == false
+    assert m.optional_float == 0.0
+    assert m.optional_double == 0.0
+    assert m.optional_string == ""
+    assert m.optional_bytes == ""
+    assert m.optional_msg == nil
+    assert m.optional_enum == :Default
+  end
+
+  def test_setters
+    m = proto_module::TestMessage.new
+    m.optional_int32 = -42
+    assert m.optional_int32 == -42
+    m.optional_int64 = -0x1_0000_0000
+    assert m.optional_int64 == -0x1_0000_0000
+    m.optional_uint32 = 0x9000_0000
+    assert m.optional_uint32 == 0x9000_0000
+    m.optional_uint64 = 0x9000_0000_0000_0000
+    assert m.optional_uint64 == 0x9000_0000_0000_0000
+    m.optional_bool = true
+    assert m.optional_bool == true
+    m.optional_float = 0.5
+    assert m.optional_float == 0.5
+    m.optional_double = 0.5
+    assert m.optional_double == 0.5
+    m.optional_string = "hello"
+    assert m.optional_string == "hello"
+    m.optional_string = :hello
+    assert m.optional_string == "hello"
+    m.optional_bytes = "world".encode!('ASCII-8BIT')
+    assert m.optional_bytes == "world"
+    m.optional_msg = proto_module::TestMessage2.new(:foo => 42)
+    assert m.optional_msg == proto_module::TestMessage2.new(:foo => 42)
+    m.optional_msg = nil
+    assert m.optional_msg == nil
+    m.optional_enum = :C
+    assert m.optional_enum == :C
+    m.optional_enum = 'C'
+    assert m.optional_enum == :C
+  end
+
+  def test_ctor_args
+    m = proto_module::TestMessage.new(:optional_int32 => -42,
+                                      :optional_msg => proto_module::TestMessage2.new,
+                                      :optional_enum => :C,
+                                      :repeated_string => ["hello", "there", "world"])
+    assert m.optional_int32 == -42
+    assert m.optional_msg.class == proto_module::TestMessage2
+    assert m.repeated_string.length == 3
+    assert m.optional_enum == :C
+    assert m.repeated_string[0] == "hello"
+    assert m.repeated_string[1] == "there"
+    assert m.repeated_string[2] == "world"
+  end
+
+  def test_ctor_string_symbol_args
+    m = proto_module::TestMessage.new(:optional_enum => 'C', :repeated_enum => ['A', 'B'])
+    assert_equal :C, m.optional_enum
+    assert_equal [:A, :B], m.repeated_enum
+
+    m = proto_module::TestMessage.new(:optional_string => :foo, :repeated_string => [:foo, :bar])
+    assert_equal 'foo', m.optional_string
+    assert_equal ['foo', 'bar'], m.repeated_string
+  end
+
+  def test_ctor_nil_args
+    m = proto_module::TestMessage.new(:optional_enum => nil, :optional_int32 => nil, :optional_string => nil, :optional_msg => nil)
+
+    assert_equal :Default, m.optional_enum
+    assert_equal 0, m.optional_int32
+    assert_equal "", m.optional_string
+    assert_nil m.optional_msg
+  end
+
+  def test_embeddedmsg_hash_init
+    m = proto_module::TestEmbeddedMessageParent.new(
+      :child_msg => {sub_child: {optional_int32: 1}},
+      :number => 2,
+      :repeated_msg => [{sub_child: {optional_int32: 3}}],
+      :repeated_number => [10, 20, 30])
+
+    assert_equal 2, m.number
+    assert_equal [10, 20, 30], m.repeated_number
+
+    assert_not_nil m.child_msg
+    assert_not_nil m.child_msg.sub_child
+    assert_equal m.child_msg.sub_child.optional_int32, 1
+
+    assert_not_nil m.repeated_msg
+    assert_equal 1, m.repeated_msg.length
+    assert_equal 3, m.repeated_msg.first.sub_child.optional_int32
+  end
+
+  def test_inspect
+    m = proto_module::TestMessage.new(
+      :optional_int32 => -42,
+      :optional_enum => :A,
+      :optional_msg => proto_module::TestMessage2.new,
+      :repeated_string => ["hello", "there", "world"])
+    expected = "<#{proto_module}::TestMessage: optional_int32: -42, optional_int64: 0, optional_uint32: 0, optional_uint64: 0, optional_bool: false, optional_float: 0.0, optional_double: 0.0, optional_string: \"\", optional_bytes: \"\", optional_msg: <#{proto_module}::TestMessage2: foo: 0>, optional_enum: :A, repeated_int32: [], repeated_int64: [], repeated_uint32: [], repeated_uint64: [], repeated_bool: [], repeated_float: [], repeated_double: [], repeated_string: [\"hello\", \"there\", \"world\"], repeated_bytes: [], repeated_msg: [], repeated_enum: []>"
+    assert_equal expected, m.inspect
+
+    m = proto_module::OneofMessage.new(:b => -42)
+    expected = "<#{proto_module}::OneofMessage: a: \"\", b: -42, c: nil, d: :Default>"
+    assert_equal expected, m.inspect
+  end
+
+  def test_hash
+    m1 = proto_module::TestMessage.new(:optional_int32 => 42)
+    m2 = proto_module::TestMessage.new(:optional_int32 => 102, repeated_string: ['please', 'work', 'ok?'])
+    m3 = proto_module::TestMessage.new(:optional_int32 => 102, repeated_string: ['please', 'work', 'ok?'])
+    assert m1.hash != 0
+    assert m2.hash != 0
+    assert m3.hash != 0
+    # relying on the randomness here -- if hash function changes and we are
+    # unlucky enough to get a collision, then change the values above.
+    assert m1.hash != m2.hash
+    assert_equal m2.hash, m3.hash
+  end
+
+  def test_unknown_field_errors
+    e = assert_raise NoMethodError do
+      proto_module::TestMessage.new.hello
+    end
+    assert_match(/hello/, e.message)
+
+    e = assert_raise NoMethodError do
+      proto_module::TestMessage.new.hello = "world"
+    end
+    assert_match(/hello/, e.message)
+  end
+
+  def test_type_errors
+    m = proto_module::TestMessage.new
+    e = assert_raise Google::Protobuf::TypeError do
+      m.optional_int32 = "hello"
+    end
+
+    # Google::Protobuf::TypeError should inherit from TypeError for backwards compatibility
+    # TODO: This can be removed when we can safely migrate to Google::Protobuf::TypeError
+    assert_true e.is_a?(::TypeError)
+
+    assert_raise Google::Protobuf::TypeError do
+      m.optional_string = 42
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m.optional_string = nil
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m.optional_bool = 42
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m.optional_msg = proto_module::TestMessage.new  # expects TestMessage2
+    end
+
+    assert_raise Google::Protobuf::TypeError do
+      m.repeated_int32 = []  # needs RepeatedField
+    end
+
+    assert_raise Google::Protobuf::TypeError do
+      m.repeated_int32.push "hello"
+    end
+
+    assert_raise Google::Protobuf::TypeError do
+      m.repeated_msg.push proto_module::TestMessage.new
+    end
+  end
+
+  def test_string_encoding
+    m = proto_module::TestMessage.new
+
+    # Assigning a normal (ASCII or UTF8) string to a bytes field, or
+    # ASCII-8BIT to a string field will convert to the proper encoding.
+    m.optional_bytes = "Test string ASCII".encode!('ASCII')
+    assert m.optional_bytes.frozen?
+    assert_equal Encoding::ASCII_8BIT, m.optional_bytes.encoding
+    assert_equal "Test string ASCII", m.optional_bytes
+
+    assert_raise Encoding::UndefinedConversionError do
+      m.optional_bytes = "Test string UTF-8 \u0100".encode!('UTF-8')
+    end
+
+    assert_raise Encoding::UndefinedConversionError do
+      m.optional_string = ["FFFF"].pack('H*')
+    end
+
+    # "Ordinary" use case.
+    m.optional_bytes = ["FFFF"].pack('H*')
+    m.optional_string = "\u0100"
+
+    # strings are immutable so we can't do this, but serialize should catch it.
+    m.optional_string = "asdf".encode!('UTF-8')
+    # Ruby 2.5 changed to raise FrozenError. However, assert_raise don't
+    # accept subclass. Don't specify type here.
+    assert_raise do
+      m.optional_string.encode!('ASCII-8BIT')
+    end
+  end
+
+  def test_rptfield_int32
+    l = Google::Protobuf::RepeatedField.new(:int32)
+    assert l.count == 0
+    l = Google::Protobuf::RepeatedField.new(:int32, [1, 2, 3])
+    assert l.count == 3
+    assert_equal [1, 2, 3], l
+    assert_equal l, [1, 2, 3]
+    l.push 4
+    assert l == [1, 2, 3, 4]
+    dst_list = []
+    l.each { |val| dst_list.push val }
+    assert dst_list == [1, 2, 3, 4]
+    assert l.to_a == [1, 2, 3, 4]
+    assert l[0] == 1
+    assert l[3] == 4
+    l[0] = 5
+    assert l == [5, 2, 3, 4]
+
+    l2 = l.dup
+    assert l == l2
+    assert l.object_id != l2.object_id
+    l2.push 6
+    assert l.count == 4
+    assert l2.count == 5
+
+    assert l.inspect == '[5, 2, 3, 4]'
+
+    l.concat([7, 8, 9])
+    assert l == [5, 2, 3, 4, 7, 8, 9]
+    assert l.pop == 9
+    assert l == [5, 2, 3, 4, 7, 8]
+
+    assert_raise Google::Protobuf::TypeError do
+      m = proto_module::TestMessage.new
+      l.push m
+    end
+
+    m = proto_module::TestMessage.new
+    m.repeated_int32 = l
+    assert m.repeated_int32 == [5, 2, 3, 4, 7, 8]
+    assert m.repeated_int32.object_id == l.object_id
+    l.push 42
+    assert m.repeated_int32.pop == 42
+
+    l3 = l + l.dup
+    assert l3.count == l.count * 2
+    l.count.times do |i|
+      assert l3[i] == l[i]
+      assert l3[l.count + i] == l[i]
+    end
+
+    l.clear
+    assert l.count == 0
+    l += [1, 2, 3, 4]
+    l.replace([5, 6, 7, 8])
+    assert l == [5, 6, 7, 8]
+
+    l4 = Google::Protobuf::RepeatedField.new(:int32)
+    l4[5] = 42
+    assert l4 == [0, 0, 0, 0, 0, 42]
+
+    l4 << 100
+    assert l4 == [0, 0, 0, 0, 0, 42, 100]
+    l4 << 101 << 102
+    assert l4 == [0, 0, 0, 0, 0, 42, 100, 101, 102]
+  end
+
+  def test_parent_rptfield
+    #make sure we set the RepeatedField and can add to it
+    m = proto_module::TestMessage.new
+    assert m.repeated_string == []
+    m.repeated_string << 'ok'
+    m.repeated_string.push('ok2')
+    assert m.repeated_string == ['ok', 'ok2']
+    m.repeated_string += ['ok3']
+    assert m.repeated_string == ['ok', 'ok2', 'ok3']
+  end
+
+  def test_rptfield_msg
+    l = Google::Protobuf::RepeatedField.new(:message, proto_module::TestMessage)
+    l.push proto_module::TestMessage.new
+    assert l.count == 1
+    assert_raise Google::Protobuf::TypeError do
+      l.push proto_module::TestMessage2.new
+    end
+    assert_raise Google::Protobuf::TypeError do
+      l.push 42
+    end
+
+    l2 = l.dup
+    assert l2[0] == l[0]
+    assert l2[0].object_id == l[0].object_id
+
+    l2 = Google::Protobuf.deep_copy(l)
+    assert l2[0] == l[0]
+    assert l2[0].object_id != l[0].object_id
+
+    l3 = l + l2
+    assert l3.count == 2
+    assert l3[0] == l[0]
+    assert l3[1] == l2[0]
+    l3[0].optional_int32 = 1000
+    assert l[0].optional_int32 == 1000
+
+    new_msg = proto_module::TestMessage.new(:optional_int32 => 200)
+    l4 = l + [new_msg]
+    assert l4.count == 2
+    new_msg.optional_int32 = 1000
+    assert l4[1].optional_int32 == 1000
+  end
+
+  def test_rptfield_enum
+    l = Google::Protobuf::RepeatedField.new(:enum, proto_module::TestEnum)
+    l.push :A
+    l.push :B
+    l.push :C
+    assert l.count == 3
+    assert_raise RangeError do
+      l.push :D
+    end
+    assert l[0] == :A
+
+    l.push 4
+    assert l[3] == 4
+  end
+
+  def test_rptfield_initialize
+    assert_raise ArgumentError do
+      l = Google::Protobuf::RepeatedField.new
+    end
+    assert_raise ArgumentError do
+      l = Google::Protobuf::RepeatedField.new(:message)
+    end
+    assert_raise ArgumentError do
+      l = Google::Protobuf::RepeatedField.new([1, 2, 3])
+    end
+    assert_raise ArgumentError do
+      l = Google::Protobuf::RepeatedField.new(:message, [proto_module::TestMessage2.new])
+    end
+  end
+
+  def test_rptfield_array_ducktyping
+    l = Google::Protobuf::RepeatedField.new(:int32)
+    length_methods = %w(count length size)
+    length_methods.each do |lm|
+      assert l.send(lm)  == 0
+    end
+    # out of bounds returns a nil
+    assert l[0] == nil
+    assert l[1] == nil
+    assert l[-1] == nil
+    l.push 4
+    length_methods.each do |lm|
+      assert l.send(lm) == 1
+    end
+    assert l[0] == 4
+    assert l[1] == nil
+    assert l[-1] == 4
+    assert l[-2] == nil
+
+    l.push 2
+    length_methods.each do |lm|
+      assert l.send(lm) == 2
+    end
+    assert l[0] == 4
+    assert l[1] == 2
+    assert l[2] == nil
+    assert l[-1] == 2
+    assert l[-2] == 4
+    assert l[-3] == nil
+
+    #adding out of scope will backfill with empty objects
+  end
+
+  def test_map_basic
+    # allowed key types:
+    # :int32, :int64, :uint32, :uint64, :bool, :string, :bytes.
+
+    m = Google::Protobuf::Map.new(:string, :int32)
+    m["asdf"] = 1
+    assert m["asdf"] == 1
+    m["jkl;"] = 42
+    assert m == { "jkl;" => 42, "asdf" => 1 }
+    assert m.has_key?("asdf")
+    assert !m.has_key?("qwerty")
+    assert m.length == 2
+
+    m2 = m.dup
+    assert_equal m, m2
+    assert m.hash != 0
+    assert_equal m.hash, m2.hash
+
+    collected = {}
+    m.each { |k,v| collected[v] = k }
+    assert collected == { 42 => "jkl;", 1 => "asdf" }
+
+    assert m.delete("asdf") == 1
+    assert !m.has_key?("asdf")
+    assert m["asdf"] == nil
+    assert !m.has_key?("asdf")
+
+    # We only assert on inspect value when there is one map entry because the
+    # order in which elements appear is unspecified (depends on the internal
+    # hash function). We don't want a brittle test.
+    assert m.inspect == "{\"jkl;\"=>42}"
+
+    assert m.keys == ["jkl;"]
+    assert m.values == [42]
+
+    m.clear
+    assert m.length == 0
+    assert m == {}
+
+    assert_raise TypeError do
+      m[1] = 1
+    end
+    assert_raise RangeError do
+      m["asdf"] = 0x1_0000_0000
+    end
+  end
+
+  def test_map_ctor
+    m = Google::Protobuf::Map.new(:string, :int32,
+                                  {"a" => 1, "b" => 2, "c" => 3})
+    assert m == {"a" => 1, "c" => 3, "b" => 2}
+  end
+
+  def test_map_keytypes
+    m = Google::Protobuf::Map.new(:int32, :int32)
+    m[1] = 42
+    m[-1] = 42
+    assert_raise RangeError do
+      m[0x8000_0000] = 1
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m["asdf"] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:int64, :int32)
+    m[0x1000_0000_0000_0000] = 1
+    assert_raise RangeError do
+      m[0x1_0000_0000_0000_0000] = 1
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m["asdf"] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:uint32, :int32)
+    m[0x8000_0000] = 1
+    assert_raise RangeError do
+      m[0x1_0000_0000] = 1
+    end
+    assert_raise RangeError do
+      m[-1] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:uint64, :int32)
+    m[0x8000_0000_0000_0000] = 1
+    assert_raise RangeError do
+      m[0x1_0000_0000_0000_0000] = 1
+    end
+    assert_raise RangeError do
+      m[-1] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:bool, :int32)
+    m[true] = 1
+    m[false] = 2
+    assert_raise Google::Protobuf::TypeError do
+      m[1] = 1
+    end
+    assert_raise Google::Protobuf::TypeError do
+      m["asdf"] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:string, :int32)
+    m["asdf"] = 1
+    assert_raise TypeError do
+      m[1] = 1
+    end
+    assert_raise Encoding::UndefinedConversionError do
+      bytestring = ["FFFF"].pack("H*")
+      m[bytestring] = 1
+    end
+
+    m = Google::Protobuf::Map.new(:bytes, :int32)
+    bytestring = ["FFFF"].pack("H*")
+    m[bytestring] = 1
+    # Allowed -- we will automatically convert to ASCII-8BIT.
+    m["asdf"] = 1
+    assert_raise TypeError do
+      m[1] = 1
+    end
+  end
+
+  def test_map_msg_enum_valuetypes
+    m = Google::Protobuf::Map.new(:string, :message, proto_module::TestMessage)
+    m["asdf"] = proto_module::TestMessage.new
+    assert_raise Google::Protobuf::TypeError do
+      m["jkl;"] = proto_module::TestMessage2.new
+    end
+
+    m = Google::Protobuf::Map.new(
+      :string, :message, proto_module::TestMessage,
+      { "a" => proto_module::TestMessage.new(:optional_int32 => 42),
+        "b" => proto_module::TestMessage.new(:optional_int32 => 84) })
+    assert m.length == 2
+    assert m.values.map{|msg| msg.optional_int32}.sort == [42, 84]
+
+    m = Google::Protobuf::Map.new(:string, :enum, proto_module::TestEnum,
+                                  { "x" => :A, "y" => :B, "z" => :C })
+    assert m.length == 3
+    assert m["z"] == :C
+    m["z"] = 2
+    assert m["z"] == :B
+    m["z"] = 4
+    assert m["z"] == 4
+    assert_raise RangeError do
+      m["z"] = :Z
+    end
+    assert_raise RangeError do
+      m["z"] = "z"
+    end
+  end
+
+  def test_map_dup_deep_copy
+    m = Google::Protobuf::Map.new(
+      :string, :message, proto_module::TestMessage,
+      { "a" => proto_module::TestMessage.new(:optional_int32 => 42),
+        "b" => proto_module::TestMessage.new(:optional_int32 => 84) })
+
+    m2 = m.dup
+    assert m == m2
+    assert m.object_id != m2.object_id
+    assert m["a"].object_id == m2["a"].object_id
+    assert m["b"].object_id == m2["b"].object_id
+
+    m2 = Google::Protobuf.deep_copy(m)
+    assert m == m2
+    assert m.object_id != m2.object_id
+    assert m["a"].object_id != m2["a"].object_id
+    assert m["b"].object_id != m2["b"].object_id
+  end
+
+  def test_oneof_descriptors
+    d = proto_module::OneofMessage.descriptor
+    o = d.lookup_oneof("my_oneof")
+    assert o != nil
+    assert o.class == Google::Protobuf::OneofDescriptor
+    assert o.name == "my_oneof"
+    oneof_count = 0
+    d.each_oneof{ |oneof|
+      oneof_count += 1
+      assert oneof == o
+    }
+    assert oneof_count == 1
+    assert o.count == 4
+    field_names = o.map{|f| f.name}.sort
+    assert field_names == ["a", "b", "c", "d"]
+  end
+
+  def test_oneof
+    d = proto_module::OneofMessage.new
+    assert d.a == ""
+    assert d.b == 0
+    assert d.c == nil
+    assert d.d == :Default
+    assert d.my_oneof == nil
+
+    d.a = "hi"
+    assert d.a == "hi"
+    assert d.b == 0
+    assert d.c == nil
+    assert d.d == :Default
+    assert d.my_oneof == :a
+
+    d.b = 42
+    assert d.a == ""
+    assert d.b == 42
+    assert d.c == nil
+    assert d.d == :Default
+    assert d.my_oneof == :b
+
+    d.c = proto_module::TestMessage2.new(:foo => 100)
+    assert d.a == ""
+    assert d.b == 0
+    assert d.c.foo == 100
+    assert d.d == :Default
+    assert d.my_oneof == :c
+
+    d.d = :C
+    assert d.a == ""
+    assert d.b == 0
+    assert d.c == nil
+    assert d.d == :C
+    assert d.my_oneof == :d
+
+    d2 = proto_module::OneofMessage.decode(proto_module::OneofMessage.encode(d))
+    assert d2 == d
+
+    encoded_field_a = proto_module::OneofMessage.encode(proto_module::OneofMessage.new(:a => "string"))
+    encoded_field_b = proto_module::OneofMessage.encode(proto_module::OneofMessage.new(:b => 1000))
+    encoded_field_c = proto_module::OneofMessage.encode(
+      proto_module::OneofMessage.new(:c => proto_module::TestMessage2.new(:foo => 1)))
+    encoded_field_d = proto_module::OneofMessage.encode(proto_module::OneofMessage.new(:d => :B))
+
+    d3 = proto_module::OneofMessage.decode(
+      encoded_field_c + encoded_field_a + encoded_field_d)
+    assert d3.a == ""
+    assert d3.b == 0
+    assert d3.c == nil
+    assert d3.d == :B
+
+    d4 = proto_module::OneofMessage.decode(
+      encoded_field_c + encoded_field_a + encoded_field_d +
+      encoded_field_c)
+    assert d4.a == ""
+    assert d4.b == 0
+    assert d4.c.foo == 1
+    assert d4.d == :Default
+
+    d5 = proto_module::OneofMessage.new(:a => "hello")
+    assert d5.a == "hello"
+    d5.a = nil
+    assert d5.a == ""
+    assert proto_module::OneofMessage.encode(d5) == ''
+    assert d5.my_oneof == nil
+  end
+
+  def test_enum_field
+    m = proto_module::TestMessage.new
+    assert m.optional_enum == :Default
+    m.optional_enum = :A
+    assert m.optional_enum == :A
+    assert_raise RangeError do
+      m.optional_enum = :ASDF
+    end
+    m.optional_enum = 1
+    assert m.optional_enum == :A
+    m.optional_enum = 100
+    assert m.optional_enum == 100
+  end
+
+  def test_dup
+    m = proto_module::TestMessage.new
+    m.optional_string = "hello"
+    m.optional_int32 = 42
+    tm1 = proto_module::TestMessage2.new(:foo => 100)
+    tm2 = proto_module::TestMessage2.new(:foo => 200)
+    m.repeated_msg.push tm1
+    assert m.repeated_msg[-1] == tm1
+    m.repeated_msg.push tm2
+    assert m.repeated_msg[-1] == tm2
+    m2 = m.dup
+    assert m == m2
+    m.optional_int32 += 1
+    assert m != m2
+    assert m.repeated_msg[0] == m2.repeated_msg[0]
+    assert m.repeated_msg[0].object_id == m2.repeated_msg[0].object_id
+  end
+
+  def test_deep_copy
+    m = proto_module::TestMessage.new(:optional_int32 => 42,
+                                      :repeated_msg => [proto_module::TestMessage2.new(:foo => 100)])
+    m2 = Google::Protobuf.deep_copy(m)
+    assert m == m2
+    assert m.repeated_msg == m2.repeated_msg
+    assert m.repeated_msg.object_id != m2.repeated_msg.object_id
+    assert m.repeated_msg[0].object_id != m2.repeated_msg[0].object_id
+  end
+
+  def test_eq
+    m = proto_module::TestMessage.new(:optional_int32 => 42,
+                                      :repeated_int32 => [1, 2, 3])
+    m2 = proto_module::TestMessage.new(:optional_int32 => 43,
+                                       :repeated_int32 => [1, 2, 3])
+    assert m != m2
+  end
+
+  def test_enum_lookup
+    assert proto_module::TestEnum::A == 1
+    assert proto_module::TestEnum::B == 2
+    assert proto_module::TestEnum::C == 3
+
+    assert proto_module::TestEnum::lookup(1) == :A
+    assert proto_module::TestEnum::lookup(2) == :B
+    assert proto_module::TestEnum::lookup(3) == :C
+
+    assert proto_module::TestEnum::resolve(:A) == 1
+    assert proto_module::TestEnum::resolve(:B) == 2
+    assert proto_module::TestEnum::resolve(:C) == 3
+  end
+
+  def test_parse_serialize
+    m = proto_module::TestMessage.new(:optional_int32 => 42,
+                                      :optional_string => "hello world",
+                                      :optional_enum => :B,
+                                      :repeated_string => ["a", "b", "c"],
+                                      :repeated_int32 => [42, 43, 44],
+                                      :repeated_enum => [:A, :B, :C, 100],
+                                      :repeated_msg => [proto_module::TestMessage2.new(:foo => 1),
+                                                        proto_module::TestMessage2.new(:foo => 2)])
+    data = proto_module::TestMessage.encode m
+    m2 = proto_module::TestMessage.decode data
+    assert m == m2
+
+    data = Google::Protobuf.encode m
+    m2 = Google::Protobuf.decode(proto_module::TestMessage, data)
+    assert m == m2
+  end
+
+  def test_encode_decode_helpers
+    m = proto_module::TestMessage.new(:optional_string => 'foo', :repeated_string => ['bar1', 'bar2'])
+    assert_equal 'foo', m.optional_string
+    assert_equal ['bar1', 'bar2'], m.repeated_string
+
+    json = m.to_json
+    m2 = proto_module::TestMessage.decode_json(json)
+    assert_equal 'foo', m2.optional_string
+    assert_equal ['bar1', 'bar2'], m2.repeated_string
+    if RUBY_PLATFORM != "java"
+      assert m2.optional_string.frozen?
+      assert m2.repeated_string[0].frozen?
+    end
+
+    proto = m.to_proto
+    m2 = proto_module::TestMessage.decode(proto)
+    assert_equal 'foo', m2.optional_string
+    assert_equal ['bar1', 'bar2'], m2.repeated_string
+  end
+
+  def test_protobuf_encode_decode_helpers
+    m = proto_module::TestMessage.new(:optional_string => 'foo', :repeated_string => ['bar1', 'bar2'])
+    encoded_msg = Google::Protobuf.encode(m)
+    assert_equal m.to_proto, encoded_msg
+
+    decoded_msg = Google::Protobuf.decode(proto_module::TestMessage, encoded_msg)
+    assert_equal proto_module::TestMessage.decode(m.to_proto), decoded_msg
+  end
+
+  def test_protobuf_encode_decode_json_helpers
+    m = proto_module::TestMessage.new(:optional_string => 'foo', :repeated_string => ['bar1', 'bar2'])
+    encoded_msg = Google::Protobuf.encode_json(m)
+    assert_equal m.to_json, encoded_msg
+
+    decoded_msg = Google::Protobuf.decode_json(proto_module::TestMessage, encoded_msg)
+    assert_equal proto_module::TestMessage.decode_json(m.to_json), decoded_msg
+  end
+
+  def test_def_errors
+    s = Google::Protobuf::DescriptorPool.new
+    assert_raise Google::Protobuf::TypeError do
+      s.build do
+        # enum with no default (integer value 0)
+        add_enum "MyEnum" do
+          value :A, 1
+        end
+      end
+    end
+    assert_raise Google::Protobuf::TypeError do
+      s.build do
+        # message with required field (unsupported in proto3)
+        add_message "MyMessage" do
+          required :foo, :int32, 1
+        end
+      end
+    end
+  end
+
+  def test_corecursive
+    # just be sure that we can instantiate types with corecursive field-type
+    # references.
+    m = proto_module::Recursive1.new(:foo => proto_module::Recursive2.new(:foo => proto_module::Recursive1.new))
+    assert proto_module::Recursive1.descriptor.lookup("foo").subtype ==
+           proto_module::Recursive2.descriptor
+    assert proto_module::Recursive2.descriptor.lookup("foo").subtype ==
+           proto_module::Recursive1.descriptor
+
+    serialized = proto_module::Recursive1.encode(m)
+    m2 = proto_module::Recursive1.decode(serialized)
+    assert m == m2
+  end
+
+  def test_serialize_cycle
+    m = proto_module::Recursive1.new(:foo => proto_module::Recursive2.new)
+    m.foo.foo = m
+    assert_raise RuntimeError do
+      serialized = proto_module::Recursive1.encode(m)
+    end
+  end
+
+  def test_bad_field_names
+    m = proto_module::BadFieldNames.new(:dup => 1, :class => 2)
+    m2 = m.dup
+    assert m == m2
+    assert m['dup'] == 1
+    assert m['class'] == 2
+    m['dup'] = 3
+    assert m['dup'] == 3
+    m['a.b'] = 4
+    assert m['a.b'] == 4
+  end
+
+  def test_int_ranges
+    m = proto_module::TestMessage.new
+
+    m.optional_int32 = 0
+    m.optional_int32 = -0x8000_0000
+    m.optional_int32 = +0x7fff_ffff
+    m.optional_int32 = 1.0
+    m.optional_int32 = -1.0
+    m.optional_int32 = 2e9
+    assert_raise RangeError do
+      m.optional_int32 = -0x8000_0001
+    end
+    assert_raise RangeError do
+      m.optional_int32 = +0x8000_0000
+    end
+    assert_raise RangeError do
+      m.optional_int32 = +0x1000_0000_0000_0000_0000_0000 # force Bignum
+    end
+    assert_raise RangeError do
+      m.optional_int32 = 1e12
+    end
+    assert_raise RangeError do
+      m.optional_int32 = 1.5
+    end
+
+    m.optional_uint32 = 0
+    m.optional_uint32 = +0xffff_ffff
+    m.optional_uint32 = 1.0
+    m.optional_uint32 = 4e9
+    assert_raise RangeError do
+      m.optional_uint32 = -1
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = -1.5
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = -1.5e12
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = -0x1000_0000_0000_0000
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = +0x1_0000_0000
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = +0x1000_0000_0000_0000_0000_0000 # force Bignum
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = 1e12
+    end
+    assert_raise RangeError do
+      m.optional_uint32 = 1.5
+    end
+
+    m.optional_int64 = 0
+    m.optional_int64 = -0x8000_0000_0000_0000
+    m.optional_int64 = +0x7fff_ffff_ffff_ffff
+    m.optional_int64 = 1.0
+    m.optional_int64 = -1.0
+    m.optional_int64 = 8e18
+    m.optional_int64 = -8e18
+    assert_raise RangeError do
+      m.optional_int64 = -0x8000_0000_0000_0001
+    end
+    assert_raise RangeError do
+      m.optional_int64 = +0x8000_0000_0000_0000
+    end
+    assert_raise RangeError do
+      m.optional_int64 = +0x1000_0000_0000_0000_0000_0000 # force Bignum
+    end
+    assert_raise RangeError do
+      m.optional_int64 = 1e50
+    end
+    assert_raise RangeError do
+      m.optional_int64 = 1.5
+    end
+
+    m.optional_uint64 = 0
+    m.optional_uint64 = +0xffff_ffff_ffff_ffff
+    m.optional_uint64 = 1.0
+    m.optional_uint64 = 16e18
+    assert_raise RangeError do
+      m.optional_uint64 = -1
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = -1.5
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = -1.5e12
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = -0x1_0000_0000_0000_0000
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = +0x1_0000_0000_0000_0000
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = +0x1000_0000_0000_0000_0000_0000 # force Bignum
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = 1e50
+    end
+    assert_raise RangeError do
+      m.optional_uint64 = 1.5
+    end
+  end
+
+  def test_stress_test
+    m = proto_module::TestMessage.new
+    m.optional_int32 = 42
+    m.optional_int64 = 0x100000000
+    m.optional_string = "hello world"
+    10.times do m.repeated_msg.push proto_module::TestMessage2.new(:foo => 42) end
+    10.times do m.repeated_string.push "hello world" end
+
+    data = proto_module::TestMessage.encode(m)
+
+    l = 0
+    10_000.times do
+      m = proto_module::TestMessage.decode(data)
+      data_new = proto_module::TestMessage.encode(m)
+      assert data_new == data
+      data = data_new
+    end
+  end
+
+  def test_reflection
+    m = proto_module::TestMessage.new(:optional_int32 => 1234)
+    msgdef = m.class.descriptor
+    assert msgdef.class == Google::Protobuf::Descriptor
+    assert msgdef.any? {|field| field.name == "optional_int32"}
+    optional_int32 = msgdef.lookup "optional_int32"
+    assert optional_int32.class == Google::Protobuf::FieldDescriptor
+    assert optional_int32 != nil
+    assert optional_int32.name == "optional_int32"
+    assert optional_int32.type == :int32
+    optional_int32.set(m, 5678)
+    assert m.optional_int32 == 5678
+    m.optional_int32 = 1000
+    assert optional_int32.get(m) == 1000
+
+    optional_msg = msgdef.lookup "optional_msg"
+    assert optional_msg.subtype == proto_module::TestMessage2.descriptor
+
+    optional_msg.set(m, optional_msg.subtype.msgclass.new)
+
+    assert msgdef.msgclass == proto_module::TestMessage
+
+    optional_enum = msgdef.lookup "optional_enum"
+    assert optional_enum.subtype == proto_module::TestEnum.descriptor
+    assert optional_enum.subtype.class == Google::Protobuf::EnumDescriptor
+    optional_enum.subtype.each do |k, v|
+      # set with integer, check resolution to symbolic name
+      optional_enum.set(m, v)
+      assert optional_enum.get(m) == k
+    end
+  end
+
+  def test_json
+    # TODO: Fix JSON in JRuby version.
+    return if RUBY_PLATFORM == "java"
+    m = proto_module::TestMessage.new(:optional_int32 => 1234,
+                                      :optional_int64 => -0x1_0000_0000,
+                                      :optional_uint32 => 0x8000_0000,
+                                      :optional_uint64 => 0xffff_ffff_ffff_ffff,
+                                      :optional_bool => true,
+                                      :optional_float => 1.0,
+                                      :optional_double => -1e100,
+                                      :optional_string => "Test string",
+                                      :optional_bytes => ["FFFFFFFF"].pack('H*'),
+                                      :optional_msg => proto_module::TestMessage2.new(:foo => 42),
+                                      :repeated_int32 => [1, 2, 3, 4],
+                                      :repeated_string => ["a", "b", "c"],
+                                      :repeated_bool => [true, false, true, false],
+                                      :repeated_msg => [proto_module::TestMessage2.new(:foo => 1),
+                                                        proto_module::TestMessage2.new(:foo => 2)])
+
+    json_text = proto_module::TestMessage.encode_json(m)
+    m2 = proto_module::TestMessage.decode_json(json_text)
+    puts m.inspect
+    puts m2.inspect
+    assert m == m2
+
+    # Crash case from GitHub issue 283.
+    bar = proto_module::Bar.new(msg: "bar")
+    baz1 = proto_module::Baz.new(msg: "baz")
+    baz2 = proto_module::Baz.new(msg: "quux")
+    proto_module::Foo.encode_json(proto_module::Foo.new)
+    proto_module::Foo.encode_json(proto_module::Foo.new(bar: bar))
+    proto_module::Foo.encode_json(proto_module::Foo.new(bar: bar, baz: [baz1, baz2]))
+  end
+
+  def test_json_empty
+    assert proto_module::TestMessage.encode_json(proto_module::TestMessage.new) == '{}'
+  end
+
+  def test_json_emit_defaults
+    # TODO: Fix JSON in JRuby version.
+    return if RUBY_PLATFORM == "java"
+    m = proto_module::TestMessage.new
+
+    expected = {
+      optionalInt32: 0,
+      optionalInt64: 0,
+      optionalUint32: 0,
+      optionalUint64: 0,
+      optionalBool: false,
+      optionalFloat: 0,
+      optionalDouble: 0,
+      optionalString: "",
+      optionalBytes: "",
+      optionalEnum: "Default",
+      repeatedInt32: [],
+      repeatedInt64: [],
+      repeatedUint32: [],
+      repeatedUint64: [],
+      repeatedBool: [],
+      repeatedFloat: [],
+      repeatedDouble: [],
+      repeatedString: [],
+      repeatedBytes: [],
+      repeatedMsg: [],
+      repeatedEnum: []
+    }
+
+    actual = proto_module::TestMessage.encode_json(m, :emit_defaults => true)
+
+    assert JSON.parse(actual, :symbolize_names => true) == expected
+  end
+
+  def test_json_emit_defaults_submsg
+    # TODO: Fix JSON in JRuby version.
+    return if RUBY_PLATFORM == "java"
+    m = proto_module::TestMessage.new(optional_msg: proto_module::TestMessage2.new)
+
+    expected = {
+      optionalInt32: 0,
+      optionalInt64: 0,
+      optionalUint32: 0,
+      optionalUint64: 0,
+      optionalBool: false,
+      optionalFloat: 0,
+      optionalDouble: 0,
+      optionalString: "",
+      optionalBytes: "",
+      optionalMsg: {foo: 0},
+      optionalEnum: "Default",
+      repeatedInt32: [],
+      repeatedInt64: [],
+      repeatedUint32: [],
+      repeatedUint64: [],
+      repeatedBool: [],
+      repeatedFloat: [],
+      repeatedDouble: [],
+      repeatedString: [],
+      repeatedBytes: [],
+      repeatedMsg: [],
+      repeatedEnum: []
+    }
+
+    actual = proto_module::TestMessage.encode_json(m, :emit_defaults => true)
+
+    assert JSON.parse(actual, :symbolize_names => true) == expected
+  end
+
+  def test_json_emit_defaults_repeated_submsg
+    # TODO: Fix JSON in JRuby version.
+    return if RUBY_PLATFORM == "java"
+    m = proto_module::TestMessage.new(repeated_msg: [proto_module::TestMessage2.new])
+
+    expected = {
+      optionalInt32: 0,
+      optionalInt64: 0,
+      optionalUint32: 0,
+      optionalUint64: 0,
+      optionalBool: false,
+      optionalFloat: 0,
+      optionalDouble: 0,
+      optionalString: "",
+      optionalBytes: "",
+      optionalEnum: "Default",
+      repeatedInt32: [],
+      repeatedInt64: [],
+      repeatedUint32: [],
+      repeatedUint64: [],
+      repeatedBool: [],
+      repeatedFloat: [],
+      repeatedDouble: [],
+      repeatedString: [],
+      repeatedBytes: [],
+      repeatedMsg: [{foo: 0}],
+      repeatedEnum: []
+    }
+
+    actual = proto_module::TestMessage.encode_json(m, :emit_defaults => true)
+
+    assert JSON.parse(actual, :symbolize_names => true) == expected
+  end
+
+  def test_comparison_with_arbitrary_object
+    assert proto_module::TestMessage.new != nil
+  end
+end

+ 46 - 2
ruby/tests/gc_test.rb

@@ -6,12 +6,13 @@ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
 old_gc = GC.stress
 GC.stress = 0x01 | 0x04
 require 'generated_code_pb'
+require 'generated_code_proto2_pb'
 GC.stress = old_gc
 
 require 'test/unit'
 
 class GCTest < Test::Unit::TestCase
-  def get_msg
+  def get_msg_proto3
     A::B::C::TestMessage.new(
         :optional_int32 => 1,
         :optional_int64 => 1,
@@ -46,12 +47,55 @@ class GCTest < Test::Unit::TestCase
         :map_string_bool => {"a" => true},
     )
   end
+
+  def get_msg_proto2
+    A::B::Proto2::TestMessage.new(
+        :optional_int32 => 1,
+        :optional_int64 => 1,
+        :optional_uint32 => 1,
+        :optional_uint64 => 1,
+        :optional_bool => true,
+        :optional_double => 1.0,
+        :optional_float => 1.0,
+        :optional_string => "a",
+        :optional_bytes => "b",
+        :optional_enum => A::B::Proto2::TestEnum::A,
+        :optional_msg => A::B::Proto2::TestMessage.new(),
+        :repeated_int32 => [1],
+        :repeated_int64 => [1],
+        :repeated_uint32 => [1],
+        :repeated_uint64 => [1],
+        :repeated_bool => [true],
+        :repeated_double => [1.0],
+        :repeated_float => [1.0],
+        :repeated_string => ["a"],
+        :repeated_bytes => ["b"],
+        :repeated_enum => [A::B::Proto2::TestEnum::A],
+        :repeated_msg => [A::B::Proto2::TestMessage.new()],
+        :required_int32 => 1,
+        :required_int64 => 1,
+        :required_uint32 => 1,
+        :required_uint64 => 1,
+        :required_bool => true,
+        :required_double => 1.0,
+        :required_float => 1.0,
+        :required_string => "a",
+        :required_bytes => "b",
+        :required_enum => A::B::Proto2::TestEnum::A,
+        :required_msg => A::B::Proto2::TestMessage.new(),
+    )
+  end
+
   def test_generated_msg
     old_gc = GC.stress
     GC.stress = 0x01 | 0x04
-    from = get_msg
+    from = get_msg_proto3
     data = A::B::C::TestMessage.encode(from)
     to = A::B::C::TestMessage.decode(data)
+
+    from = get_msg_proto2
+    data = A::B::Proto2::TestMessage.encode(from)
+    to = A::B::Proto2::TestMessage.decode(data)
     GC.stress = old_gc
     puts "passed"
   end

+ 80 - 0
ruby/tests/generated_code_proto2.proto

@@ -0,0 +1,80 @@
+syntax = "proto2";
+
+package a.b.proto2;
+
+message TestMessage {
+  optional int32 optional_int32 = 1;
+  optional int64 optional_int64 = 2;
+  optional uint32 optional_uint32 = 3;
+  optional uint64 optional_uint64 = 4;
+  optional bool optional_bool = 5;
+  optional double optional_double = 6;
+  optional float optional_float = 7;
+  optional string optional_string = 8;
+  optional bytes optional_bytes = 9;
+  optional TestEnum optional_enum = 10;
+  optional TestMessage optional_msg = 11;
+
+  repeated int32 repeated_int32 = 21;
+  repeated int64 repeated_int64 = 22;
+  repeated uint32 repeated_uint32 = 23;
+  repeated uint64 repeated_uint64 = 24;
+  repeated bool repeated_bool = 25;
+  repeated double repeated_double = 26;
+  repeated float repeated_float = 27;
+  repeated string repeated_string = 28;
+  repeated bytes repeated_bytes = 29;
+  repeated TestEnum repeated_enum = 30;
+  repeated TestMessage repeated_msg = 31;
+
+  required int32 required_int32 = 41;
+  required int64 required_int64 = 42;
+  required uint32 required_uint32 = 43;
+  required uint64 required_uint64 = 44;
+  required bool required_bool = 45;
+  required double required_double = 46;
+  required float required_float = 47;
+  required string required_string = 48;
+  required bytes required_bytes = 49;
+  required TestEnum required_enum = 50;
+  required TestMessage required_msg = 51;
+
+  oneof my_oneof {
+    int32 oneof_int32 = 61;
+    int64 oneof_int64 = 62;
+    uint32 oneof_uint32 = 63;
+    uint64 oneof_uint64 = 64;
+    bool oneof_bool = 65;
+    double oneof_double = 66;
+    float oneof_float = 67;
+    string oneof_string = 68;
+    bytes oneof_bytes = 69;
+    TestEnum oneof_enum = 70;
+    TestMessage oneof_msg = 71;
+  }
+
+  message NestedMessage {
+    optional int32 foo = 1;
+  }
+
+  optional NestedMessage nested_message = 80;
+
+  // Reserved for non-existing field test.
+  // int32 non_exist = 89;
+}
+
+enum TestEnum {
+  Default = 0;
+  A = 1;
+  B = 2;
+  C = 3;
+}
+
+message TestUnknown {
+  optional TestUnknown optional_unknown = 11;
+  repeated TestUnknown repeated_unknown = 31;
+  oneof my_oneof {
+    TestUnknown oneof_unknown = 51;
+  }
+  optional int32 unknown_field = 89;
+}

+ 21 - 0
ruby/tests/generated_code_proto2_test.rb

@@ -0,0 +1,21 @@
+#!/usr/bin/ruby
+
+# generated_code.rb is in the same directory as this test.
+$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
+
+require 'generated_code_proto2_pb'
+require 'test_import_proto2_pb'
+require 'test_ruby_package_proto2_pb'
+require 'test/unit'
+
+class GeneratedCodeProto2Test < Test::Unit::TestCase
+  def test_generated_msg
+    # just test that we can instantiate the message. The purpose of this test
+    # is to ensure that the output of the code generator is valid Ruby and
+    # successfully creates message definitions and classes, not to test every
+    # aspect of the extension (basic.rb is for that).
+    m = A::B::Proto2::TestMessage.new()
+    m2 = FooBar::Proto2::TestImportedMessage.new()
+    m3 = A::B::Proto2::TestRubyPackageMessage.new()
+  end
+end

+ 5 - 0
ruby/tests/test_import_proto2.proto

@@ -0,0 +1,5 @@
+syntax = "proto2";
+
+package foo_bar.proto2;
+
+message TestImportedMessage {}

+ 7 - 0
ruby/tests/test_ruby_package_proto2.proto

@@ -0,0 +1,7 @@
+syntax = "proto2";
+
+package foo_bar_proto2;
+
+option ruby_package = "A.B.Proto2";
+
+message TestRubyPackageMessage {}

+ 57 - 55
src/google/protobuf/compiler/ruby/ruby_generated_code_pb.rb

@@ -4,62 +4,64 @@
 require 'google/protobuf'
 
 Google::Protobuf::DescriptorPool.generated_pool.build do
-  add_message "A.B.C.TestMessage" do
-    optional :optional_int32, :int32, 1
-    optional :optional_int64, :int64, 2
-    optional :optional_uint32, :uint32, 3
-    optional :optional_uint64, :uint64, 4
-    optional :optional_bool, :bool, 5
-    optional :optional_double, :double, 6
-    optional :optional_float, :float, 7
-    optional :optional_string, :string, 8
-    optional :optional_bytes, :bytes, 9
-    optional :optional_enum, :enum, 10, "A.B.C.TestEnum"
-    optional :optional_msg, :message, 11, "A.B.C.TestMessage"
-    repeated :repeated_int32, :int32, 21
-    repeated :repeated_int64, :int64, 22
-    repeated :repeated_uint32, :uint32, 23
-    repeated :repeated_uint64, :uint64, 24
-    repeated :repeated_bool, :bool, 25
-    repeated :repeated_double, :double, 26
-    repeated :repeated_float, :float, 27
-    repeated :repeated_string, :string, 28
-    repeated :repeated_bytes, :bytes, 29
-    repeated :repeated_enum, :enum, 30, "A.B.C.TestEnum"
-    repeated :repeated_msg, :message, 31, "A.B.C.TestMessage"
-    map :map_int32_string, :int32, :string, 61
-    map :map_int64_string, :int64, :string, 62
-    map :map_uint32_string, :uint32, :string, 63
-    map :map_uint64_string, :uint64, :string, 64
-    map :map_bool_string, :bool, :string, 65
-    map :map_string_string, :string, :string, 66
-    map :map_string_msg, :string, :message, 67, "A.B.C.TestMessage"
-    map :map_string_enum, :string, :enum, 68, "A.B.C.TestEnum"
-    map :map_string_int32, :string, :int32, 69
-    map :map_string_bool, :string, :bool, 70
-    optional :nested_message, :message, 80, "A.B.C.TestMessage.NestedMessage"
-    oneof :my_oneof do
-      optional :oneof_int32, :int32, 41
-      optional :oneof_int64, :int64, 42
-      optional :oneof_uint32, :uint32, 43
-      optional :oneof_uint64, :uint64, 44
-      optional :oneof_bool, :bool, 45
-      optional :oneof_double, :double, 46
-      optional :oneof_float, :float, 47
-      optional :oneof_string, :string, 48
-      optional :oneof_bytes, :bytes, 49
-      optional :oneof_enum, :enum, 50, "A.B.C.TestEnum"
-      optional :oneof_msg, :message, 51, "A.B.C.TestMessage"
+  add_file("ruby_generated_code.proto", :syntax => :proto3) do
+    add_message "A.B.C.TestMessage" do
+      optional :optional_int32, :int32, 1
+      optional :optional_int64, :int64, 2
+      optional :optional_uint32, :uint32, 3
+      optional :optional_uint64, :uint64, 4
+      optional :optional_bool, :bool, 5
+      optional :optional_double, :double, 6
+      optional :optional_float, :float, 7
+      optional :optional_string, :string, 8
+      optional :optional_bytes, :bytes, 9
+      optional :optional_enum, :enum, 10, "A.B.C.TestEnum"
+      optional :optional_msg, :message, 11, "A.B.C.TestMessage"
+      repeated :repeated_int32, :int32, 21
+      repeated :repeated_int64, :int64, 22
+      repeated :repeated_uint32, :uint32, 23
+      repeated :repeated_uint64, :uint64, 24
+      repeated :repeated_bool, :bool, 25
+      repeated :repeated_double, :double, 26
+      repeated :repeated_float, :float, 27
+      repeated :repeated_string, :string, 28
+      repeated :repeated_bytes, :bytes, 29
+      repeated :repeated_enum, :enum, 30, "A.B.C.TestEnum"
+      repeated :repeated_msg, :message, 31, "A.B.C.TestMessage"
+      map :map_int32_string, :int32, :string, 61
+      map :map_int64_string, :int64, :string, 62
+      map :map_uint32_string, :uint32, :string, 63
+      map :map_uint64_string, :uint64, :string, 64
+      map :map_bool_string, :bool, :string, 65
+      map :map_string_string, :string, :string, 66
+      map :map_string_msg, :string, :message, 67, "A.B.C.TestMessage"
+      map :map_string_enum, :string, :enum, 68, "A.B.C.TestEnum"
+      map :map_string_int32, :string, :int32, 69
+      map :map_string_bool, :string, :bool, 70
+      optional :nested_message, :message, 80, "A.B.C.TestMessage.NestedMessage"
+      oneof :my_oneof do
+        optional :oneof_int32, :int32, 41
+        optional :oneof_int64, :int64, 42
+        optional :oneof_uint32, :uint32, 43
+        optional :oneof_uint64, :uint64, 44
+        optional :oneof_bool, :bool, 45
+        optional :oneof_double, :double, 46
+        optional :oneof_float, :float, 47
+        optional :oneof_string, :string, 48
+        optional :oneof_bytes, :bytes, 49
+        optional :oneof_enum, :enum, 50, "A.B.C.TestEnum"
+        optional :oneof_msg, :message, 51, "A.B.C.TestMessage"
+      end
+    end
+    add_message "A.B.C.TestMessage.NestedMessage" do
+      optional :foo, :int32, 1
+    end
+    add_enum "A.B.C.TestEnum" do
+      value :Default, 0
+      value :A, 1
+      value :B, 2
+      value :C, 3
     end
-  end
-  add_message "A.B.C.TestMessage.NestedMessage" do
-    optional :foo, :int32, 1
-  end
-  add_enum "A.B.C.TestEnum" do
-    value :Default, 0
-    value :A, 1
-    value :B, 2
-    value :C, 3
   end
 end
 

+ 68 - 0
src/google/protobuf/compiler/ruby/ruby_generated_code_proto2.proto

@@ -0,0 +1,68 @@
+syntax = "proto2";
+
+package A.B.C;
+
+message TestMessage {
+  optional int32 optional_int32 = 1 [default = 1];
+  optional int64 optional_int64 = 2 [default = 2];
+  optional uint32 optional_uint32 = 3 [default = 3];
+  optional uint64 optional_uint64 = 4 [default = 4];
+  optional bool optional_bool = 5 [default = true];
+  optional double optional_double = 6 [default = 6.0];
+  optional float optional_float = 7 [default = 7.0];
+  optional string optional_string = 8 [default = "default str"];
+  optional bytes optional_bytes = 9 [default = "\0\1\2\100fubar"];
+  optional TestEnum optional_enum = 10 [default = A];
+  optional TestMessage optional_msg = 11;
+
+  repeated int32 repeated_int32 = 21;
+  repeated int64 repeated_int64 = 22;
+  repeated uint32 repeated_uint32 = 23;
+  repeated uint64 repeated_uint64 = 24;
+  repeated bool repeated_bool = 25;
+  repeated double repeated_double = 26;
+  repeated float repeated_float = 27;
+  repeated string repeated_string = 28;
+  repeated bytes repeated_bytes = 29;
+  repeated TestEnum repeated_enum = 30;
+  repeated TestMessage repeated_msg = 31;
+
+  required int32 required_int32 = 41;
+  required int64 required_int64 = 42;
+  required uint32 required_uint32 = 43;
+  required uint64 required_uint64 = 44;
+  required bool required_bool = 45;
+  required double required_double = 46;
+  required float required_float = 47;
+  required string required_string = 48;
+  required bytes required_bytes = 49;
+  required TestEnum required_enum = 50;
+  required TestMessage required_msg = 51;
+
+  oneof my_oneof {
+    int32 oneof_int32 = 61;
+    int64 oneof_int64 = 62;
+    uint32 oneof_uint32 = 63;
+    uint64 oneof_uint64 = 64;
+    bool oneof_bool = 65;
+    double oneof_double = 66;
+    float oneof_float = 67;
+    string oneof_string = 68;
+    bytes oneof_bytes = 69;
+    TestEnum oneof_enum = 70;
+    TestMessage oneof_msg = 71;
+  }
+
+  message NestedMessage {
+    optional int32 foo = 1;
+  }
+
+  optional NestedMessage nested_message = 80;
+}
+
+enum TestEnum {
+  Default = 0;
+  A = 1;
+  B = 2;
+  C = 3;
+}

+ 77 - 0
src/google/protobuf/compiler/ruby/ruby_generated_code_proto2_pb.rb

@@ -0,0 +1,77 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: ruby_generated_code_proto2.proto
+
+require 'google/protobuf'
+
+Google::Protobuf::DescriptorPool.generated_pool.build do
+  add_file("ruby_generated_code_proto2.proto", :syntax => :proto2) do
+    add_message "A.B.C.TestMessage" do
+      optional :optional_int32, :int32, 1, default: 1
+      optional :optional_int64, :int64, 2, default: 2
+      optional :optional_uint32, :uint32, 3, default: 3
+      optional :optional_uint64, :uint64, 4, default: 4
+      optional :optional_bool, :bool, 5, default: true
+      optional :optional_double, :double, 6, default: 6
+      optional :optional_float, :float, 7, default: 7
+      optional :optional_string, :string, 8, default: "default str"
+      optional :optional_bytes, :bytes, 9, default: "\x00\x01\x02\x40\x66\x75\x62\x61\x72".force_encoding("ASCII-8BIT")
+      optional :optional_enum, :enum, 10, "A.B.C.TestEnum", default: 1
+      optional :optional_msg, :message, 11, "A.B.C.TestMessage"
+      repeated :repeated_int32, :int32, 21
+      repeated :repeated_int64, :int64, 22
+      repeated :repeated_uint32, :uint32, 23
+      repeated :repeated_uint64, :uint64, 24
+      repeated :repeated_bool, :bool, 25
+      repeated :repeated_double, :double, 26
+      repeated :repeated_float, :float, 27
+      repeated :repeated_string, :string, 28
+      repeated :repeated_bytes, :bytes, 29
+      repeated :repeated_enum, :enum, 30, "A.B.C.TestEnum"
+      repeated :repeated_msg, :message, 31, "A.B.C.TestMessage"
+      required :required_int32, :int32, 41
+      required :required_int64, :int64, 42
+      required :required_uint32, :uint32, 43
+      required :required_uint64, :uint64, 44
+      required :required_bool, :bool, 45
+      required :required_double, :double, 46
+      required :required_float, :float, 47
+      required :required_string, :string, 48
+      required :required_bytes, :bytes, 49
+      required :required_enum, :enum, 50, "A.B.C.TestEnum"
+      required :required_msg, :message, 51, "A.B.C.TestMessage"
+      optional :nested_message, :message, 80, "A.B.C.TestMessage.NestedMessage"
+      oneof :my_oneof do
+        optional :oneof_int32, :int32, 61
+        optional :oneof_int64, :int64, 62
+        optional :oneof_uint32, :uint32, 63
+        optional :oneof_uint64, :uint64, 64
+        optional :oneof_bool, :bool, 65
+        optional :oneof_double, :double, 66
+        optional :oneof_float, :float, 67
+        optional :oneof_string, :string, 68
+        optional :oneof_bytes, :bytes, 69
+        optional :oneof_enum, :enum, 70, "A.B.C.TestEnum"
+        optional :oneof_msg, :message, 71, "A.B.C.TestMessage"
+      end
+    end
+    add_message "A.B.C.TestMessage.NestedMessage" do
+      optional :foo, :int32, 1
+    end
+    add_enum "A.B.C.TestEnum" do
+      value :Default, 0
+      value :A, 1
+      value :B, 2
+      value :C, 3
+    end
+  end
+end
+
+module A
+  module B
+    module C
+      TestMessage = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage").msgclass
+      TestMessage::NestedMessage = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestMessage.NestedMessage").msgclass
+      TestEnum = Google::Protobuf::DescriptorPool.generated_pool.lookup("A.B.C.TestEnum").enummodule
+    end
+  end
+end

+ 114 - 25
src/google/protobuf/compiler/ruby/ruby_generator.cc

@@ -28,6 +28,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <iomanip>
 #include <sstream>
 
 #include <google/protobuf/compiler/code_generator.h>
@@ -45,12 +46,13 @@ namespace compiler {
 namespace ruby {
 
 // Forward decls.
-std::string IntToString(int32 value);
+template<class numeric_type> std::string NumberToString(numeric_type value);
 std::string GetRequireName(const std::string& proto_file);
 std::string LabelForField(google::protobuf::FieldDescriptor* field);
 std::string TypeName(google::protobuf::FieldDescriptor* field);
-void GenerateMessage(const google::protobuf::Descriptor* message,
-                     google::protobuf::io::Printer* printer);
+bool GenerateMessage(const google::protobuf::Descriptor* message,
+                     google::protobuf::io::Printer* printer,
+		     std::string* error);
 void GenerateEnum(const google::protobuf::EnumDescriptor* en,
                   google::protobuf::io::Printer* printer);
 void GenerateMessageAssignment(
@@ -61,8 +63,11 @@ void GenerateEnumAssignment(
     const std::string& prefix,
     const google::protobuf::EnumDescriptor* en,
     google::protobuf::io::Printer* printer);
-
-std::string IntToString(int32 value) {
+std::string DefaultValueForField(
+    const google::protobuf::FieldDescriptor* field);
+  
+template<class numeric_type>
+std::string NumberToString(numeric_type value) {
   std::ostringstream os;
   os << value;
   return os.str();
@@ -110,6 +115,62 @@ std::string TypeName(const google::protobuf::FieldDescriptor* field) {
   }
 }
 
+string StringifySyntax(FileDescriptor::Syntax syntax) {
+  switch (syntax) {
+    case FileDescriptor::SYNTAX_PROTO2:
+      return "proto2";
+    case FileDescriptor::SYNTAX_PROTO3:
+      return "proto3";
+    case FileDescriptor::SYNTAX_UNKNOWN:
+    default:
+      GOOGLE_LOG(FATAL) << "Unsupported syntax; this generator only supports "
+	  "proto2 and proto3 syntax.";
+      return "";
+  }
+}
+
+std::string DefaultValueForField(const google::protobuf::FieldDescriptor* field) {
+  switch(field->cpp_type()) {
+    case FieldDescriptor::CPPTYPE_INT32:
+      return NumberToString(field->default_value_int32());
+    case FieldDescriptor::CPPTYPE_INT64:
+      return NumberToString(field->default_value_int64());
+    case FieldDescriptor::CPPTYPE_UINT32:
+      return NumberToString(field->default_value_uint32());
+    case FieldDescriptor::CPPTYPE_UINT64:
+      return NumberToString(field->default_value_uint64());
+    case FieldDescriptor::CPPTYPE_FLOAT:
+      return NumberToString(field->default_value_float());
+    case FieldDescriptor::CPPTYPE_DOUBLE:
+      return NumberToString(field->default_value_double());
+    case FieldDescriptor::CPPTYPE_BOOL:
+      return field->default_value_bool() ? "true" : "false";
+    case FieldDescriptor::CPPTYPE_ENUM:
+      return NumberToString(field->default_value_enum()->number());
+    case FieldDescriptor::CPPTYPE_STRING: {
+      std::ostringstream os;
+      string default_str = field->default_value_string();
+      
+      if (field->type() == FieldDescriptor::TYPE_STRING) {
+        os << "\"" << default_str << "\"";
+      } else if (field->type() == FieldDescriptor::TYPE_BYTES) {
+        os << "\"";
+
+        os.fill('0');
+        for (int i = 0; i < default_str.length(); ++i) {
+          // Write the hex form of each byte.
+          os << "\\x" << std::hex << std::setw(2)
+	     << ((uint16) ((unsigned char) default_str.at(i)));
+        }
+        os << "\".force_encoding(\"ASCII-8BIT\")";
+      }
+
+      return os.str();
+    }
+    default: assert(false); return "";
+  }
+}
+
 void GenerateField(const google::protobuf::FieldDescriptor* field,
                    google::protobuf::io::Printer* printer) {
 
@@ -124,7 +185,7 @@ void GenerateField(const google::protobuf::FieldDescriptor* field,
       "name", field->name(),
       "key_type", TypeName(key_field),
       "value_type", TypeName(value_field),
-      "number", IntToString(field->number()));
+      "number", NumberToString(field->number()));
 
     if (value_field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
       printer->Print(
@@ -146,19 +207,25 @@ void GenerateField(const google::protobuf::FieldDescriptor* field,
     printer->Print(
       ":$type$, $number$",
       "type", TypeName(field),
-      "number", IntToString(field->number()));
+      "number", NumberToString(field->number()));
 
     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
       printer->Print(
-        ", \"$subtype$\"\n",
+        ", \"$subtype$\"",
        "subtype", field->message_type()->full_name());
     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
       printer->Print(
-        ", \"$subtype$\"\n",
+        ", \"$subtype$\"",
         "subtype", field->enum_type()->full_name());
-    } else {
-      printer->Print("\n");
     }
+
+    if (field->has_default_value()) {
+      printer->Print(
+	", default: $default$",
+	"default", DefaultValueForField(field));
+    }
+    
+    printer->Print("\n");
   }
 }
 
@@ -178,13 +245,18 @@ void GenerateOneof(const google::protobuf::OneofDescriptor* oneof,
   printer->Print("end\n");
 }
 
-void GenerateMessage(const google::protobuf::Descriptor* message,
-                     google::protobuf::io::Printer* printer) {
-
+bool GenerateMessage(const google::protobuf::Descriptor* message,
+                     google::protobuf::io::Printer* printer,
+		     std::string* error) {
+  if (message->extension_range_count() > 0 || message->extension_count() > 0) {
+    *error = "Extensions are not yet supported for proto2 .proto files.";
+    return false;
+  }
+  
   // Don't generate MapEntry messages -- we use the Ruby extension's native
   // support for map fields instead.
   if (message->options().map_entry()) {
-    return;
+    return true;
   }
 
   printer->Print(
@@ -208,11 +280,15 @@ void GenerateMessage(const google::protobuf::Descriptor* message,
   printer->Print("end\n");
 
   for (int i = 0; i < message->nested_type_count(); i++) {
-    GenerateMessage(message->nested_type(i), printer);
+    if (!GenerateMessage(message->nested_type(i), printer, error)) {
+      return false;
+    }
   }
   for (int i = 0; i < message->enum_type_count(); i++) {
     GenerateEnum(message->enum_type(i), printer);
   }
+
+  return true;
 }
 
 void GenerateEnum(const google::protobuf::EnumDescriptor* en,
@@ -227,7 +303,7 @@ void GenerateEnum(const google::protobuf::EnumDescriptor* en,
     printer->Print(
       "value :$name$, $number$\n",
       "name", value->name(),
-      "number", IntToString(value->number()));
+      "number", NumberToString(value->number()));
   }
 
   printer->Outdent();
@@ -423,7 +499,8 @@ bool MaybeEmitDependency(const FileDescriptor* import,
                          const FileDescriptor* from,
                          io::Printer* printer,
                          string* error) {
-  if (import->syntax() == FileDescriptor::SYNTAX_PROTO2) {
+  if (from->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
+      import->syntax() == FileDescriptor::SYNTAX_PROTO2) {
     for (int i = 0; i < from->message_type_count(); i++) {
       if (UsesTypeFromFile(from->message_type(i), import, error)) {
         // Error text was already set by UsesTypeFromFile().
@@ -462,16 +539,29 @@ bool GenerateFile(const FileDescriptor* file, io::Printer* printer,
     }
   }
 
-  printer->Print(
-    "Google::Protobuf::DescriptorPool.generated_pool.build do\n");
+  // TODO: Remove this when ruby supports extensions for proto2 syntax.
+  if (file->extension_count() > 0) {
+    *error = "Extensions are not yet supported for proto2 .proto files.";
+    return false;
+  }
+
+  printer->Print("Google::Protobuf::DescriptorPool.generated_pool.build do\n");
+  printer->Indent();
+  printer->Print("add_file(\"$filename$\", :syntax => :$syntax$) do\n",
+		 "filename", file->name(), "syntax",
+		 StringifySyntax(file->syntax()));
   printer->Indent();
   for (int i = 0; i < file->message_type_count(); i++) {
-    GenerateMessage(file->message_type(i), printer);
+    if (!GenerateMessage(file->message_type(i), printer, error)) {
+      return false;
+    }
   }
   for (int i = 0; i < file->enum_type_count(); i++) {
     GenerateEnum(file->enum_type(i), printer);
   }
   printer->Outdent();
+  printer->Print("end\n");
+  printer->Outdent();
   printer->Print(
     "end\n\n");
 
@@ -492,10 +582,9 @@ bool Generator::Generate(
     GeneratorContext* generator_context,
     string* error) const {
 
-  if (file->syntax() != FileDescriptor::SYNTAX_PROTO3) {
-    *error =
-        "Can only generate Ruby code for proto3 .proto files.\n"
-        "Please add 'syntax = \"proto3\";' to the top of your .proto file.\n";
+  if (file->syntax() != FileDescriptor::SYNTAX_PROTO3 &&
+      file->syntax() != FileDescriptor::SYNTAX_PROTO2) {
+    *error = "Invalid or unsupported proto syntax";
     return false;
   }
 

+ 47 - 1
src/google/protobuf/compiler/ruby/ruby_generator_unittest.cc

@@ -56,7 +56,7 @@ string FindRubyTestDir() {
 // Some day, we may integrate build systems between protoc and the language
 // extensions to the point where we can do this test in a more automated way.
 
-TEST(RubyGeneratorTest, GeneratorTest) {
+TEST(RubyGeneratorTest, Proto3GeneratorTest) {
   string ruby_tests = FindRubyTestDir();
 
   google::protobuf::compiler::CommandLineInterface cli;
@@ -102,6 +102,52 @@ TEST(RubyGeneratorTest, GeneratorTest) {
   EXPECT_EQ(expected_output, output);
 }
 
+TEST(RubyGeneratorTest, Proto2GeneratorTest) {
+  string ruby_tests = FindRubyTestDir();
+
+  google::protobuf::compiler::CommandLineInterface cli;
+  cli.SetInputsAreProtoPathRelative(true);
+
+  ruby::Generator ruby_generator;
+  cli.RegisterGenerator("--ruby_out", &ruby_generator, "");
+
+  // Copy generated_code.proto to the temporary test directory.
+  string test_input;
+  GOOGLE_CHECK_OK(File::GetContents(
+      ruby_tests + "/ruby_generated_code_proto2.proto",
+      &test_input,
+      true));
+  GOOGLE_CHECK_OK(File::SetContents(
+      TestTempDir() + "/ruby_generated_code_proto2.proto",
+      test_input,
+      true));
+
+  // Invoke the proto compiler (we will be inside TestTempDir() at this point).
+  string ruby_out = "--ruby_out=" + TestTempDir();
+  string proto_path = "--proto_path=" + TestTempDir();
+  const char* argv[] = {
+    "protoc",
+    ruby_out.c_str(),
+    proto_path.c_str(),
+    "ruby_generated_code_proto2.proto",
+  };
+
+  EXPECT_EQ(0, cli.Run(4, argv));
+
+  // Load the generated output and compare to the expected result.
+  string output;
+  GOOGLE_CHECK_OK(File::GetContents(
+      TestTempDir() + "/ruby_generated_code_proto2_pb.rb",
+      &output,
+      true));
+  string expected_output;
+  GOOGLE_CHECK_OK(File::GetContents(
+      ruby_tests + "/ruby_generated_code_proto2_pb.rb",
+      &expected_output,
+      true));
+  EXPECT_EQ(expected_output, output);
+}
+
 }  // namespace
 }  // namespace ruby
 }  // namespace compiler

Some files were not shown because too many files changed in this diff