Browse Source

Merge pull request #6502 from haberman/rubygcfix

Optimized away the creation of empty string objects.
Joshua Haberman 6 years ago
parent
commit
402c28a321

+ 31 - 0
ruby/ext/google/protobuf_c/protobuf.c

@@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
   return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
   return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
 }
 }
 
 
+static VALUE cached_empty_string = Qnil;
+static VALUE cached_empty_bytes = Qnil;
+
+static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
+  VALUE str_rb = rb_str_new(str, size);
+
+  rb_enc_associate(str_rb,
+                   binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
+  rb_obj_freeze(str_rb);
+  return str_rb;
+}
+
+VALUE get_frozen_string(const char* str, size_t size, bool binary) {
+  if (size == 0) {
+    return binary ? cached_empty_bytes : cached_empty_string;
+  } else {
+    // It is harder to memoize non-empty strings.  The obvious approach would be
+    // to use a Ruby hash keyed by string as memo table, but looking up in such a table
+    // requires constructing a string (the very thing we're trying to avoid).
+    //
+    // Since few fields have defaults, we will just optimize the empty string
+    // case for now.
+    return create_frozen_string(str, size, binary);
+  }
+}
+
 // -----------------------------------------------------------------------------
 // -----------------------------------------------------------------------------
 // Utilities.
 // Utilities.
 // -----------------------------------------------------------------------------
 // -----------------------------------------------------------------------------
@@ -118,4 +144,9 @@ void Init_protobuf_c() {
 
 
   rb_gc_register_address(&upb_def_to_ruby_obj_map);
   rb_gc_register_address(&upb_def_to_ruby_obj_map);
   upb_def_to_ruby_obj_map = rb_hash_new();
   upb_def_to_ruby_obj_map = rb_hash_new();
+
+  rb_gc_register_address(&cached_empty_string);
+  rb_gc_register_address(&cached_empty_bytes);
+  cached_empty_string = create_frozen_string("", 0, false);
+  cached_empty_bytes = create_frozen_string("", 0, true);
 }
 }

+ 5 - 0
ruby/ext/google/protobuf_c/protobuf.h

@@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
 // cycles.
 // cycles.
 #define ENCODE_MAX_NESTING 63
 #define ENCODE_MAX_NESTING 63
 
 
+// -----------------------------------------------------------------------------
+// A cache of frozen string objects to use as field defaults.
+// -----------------------------------------------------------------------------
+VALUE get_frozen_string(const char* data, size_t size, bool binary);
+
 // -----------------------------------------------------------------------------
 // -----------------------------------------------------------------------------
 // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
 // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
 // instances.
 // instances.

+ 13 - 15
ruby/ext/google/protobuf_c/storage.c

@@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
       kRubyStringUtf8Encoding : kRubyString8bitEncoding;
       kRubyStringUtf8Encoding : kRubyString8bitEncoding;
   VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);
   VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);
 
 
-  // Note: this will not duplicate underlying string data unless necessary.
-  value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
+  if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
+    // Note: this will not duplicate underlying string data unless necessary.
+    value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
 
 
-  if (type == UPB_TYPE_STRING &&
-      rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
-    rb_raise(rb_eEncodingError, "String is invalid UTF-8");
-  }
+    if (type == UPB_TYPE_STRING &&
+        rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
+      rb_raise(rb_eEncodingError, "String is invalid UTF-8");
+    }
 
 
-  // Ensure the data remains valid.  Since we called #encode a moment ago,
-  // this does not freeze the string the user assigned.
-  rb_obj_freeze(value);
+    // Ensure the data remains valid.  Since we called #encode a moment ago,
+    // this does not freeze the string the user assigned.
+    rb_obj_freeze(value);
+  }
 
 
   return value;
   return value;
 }
 }
@@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
     case UPB_TYPE_BYTES: {
     case UPB_TYPE_BYTES: {
       size_t size;
       size_t size;
       const char *str = upb_fielddef_defaultstr(field, &size);
       const char *str = upb_fielddef_defaultstr(field, &size);
-      VALUE str_rb = rb_str_new(str, size);
-
-      rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
-                 kRubyString8bitEncoding : kRubyStringUtf8Encoding);
-      rb_obj_freeze(str_rb);
-      return str_rb;
+      return get_frozen_string(str, size,
+                               upb_fielddef_type(field) == UPB_TYPE_BYTES);
     }
     }
     default: return Qnil;
     default: return Qnil;
   }
   }