Browse Source

Ruby <2.7now uses WeakMap too, which prevents memory leaks.

Ruby <2.7 does not allow non-finalizable objects to be WeakMap
keys: https://bugs.ruby-lang.org/issues/16035

We work around this by using a secondary map for Ruby <2.7 which
maps the non-finalizable integer to a distinct object.

For now we accept that the entries in the secondary map wil never
be collected.  If this becomes a problem we can perform a GC pass
every so often that looks at the contents of the object cache to
decide what can be deleted from the secondary map.
Joshua Haberman 4 years ago
parent
commit
9879f423ff

+ 1 - 1
ruby/ext/google/protobuf_c/defs.c

@@ -295,7 +295,7 @@ static VALUE DescriptorPool_alloc(VALUE klass) {
 
   self->def_to_descriptor = rb_hash_new();
   self->symtab = upb_symtab_new();
-  ObjectCache_Add(self->symtab, ret, _upb_symtab_arena(self->symtab));
+  ObjectCache_Add(self->symtab, ret);
 
   return ret;
 }

+ 6 - 5
ruby/ext/google/protobuf_c/map.c

@@ -93,7 +93,7 @@ VALUE Map_GetRubyWrapper(upb_map* map, upb_fieldtype_t key_type,
   if (val == Qnil) {
     val = Map_alloc(cMap);
     Map* self;
-    ObjectCache_Add(map, val, Arena_get(arena));
+    ObjectCache_Add(map, val);
     TypedData_Get_Struct(val, Map, &Map_type, self);
     self->map = map;
     self->arena = arena;
@@ -318,7 +318,7 @@ static VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
 
   self->map = upb_map_new(Arena_get(self->arena), self->key_type,
                           self->value_type_info.type);
-  ObjectCache_Add(self->map, _self, Arena_get(self->arena));
+  ObjectCache_Add(self->map, _self);
 
   if (init_arg != Qnil) {
     Map_merge_into_self(_self, init_arg);
@@ -590,9 +590,10 @@ VALUE Map_eq(VALUE _self, VALUE _other) {
  */
 static VALUE Map_freeze(VALUE _self) {
   Map* self = ruby_to_Map(_self);
-
-  ObjectCache_Pin(self->map, _self, Arena_get(self->arena));
-  RB_OBJ_FREEZE(_self);
+  if (!RB_OBJ_FROZEN(_self)) {
+    Arena_Pin(self->arena, _self);
+    RB_OBJ_FREEZE(_self);
+  }
   return _self;
 }
 

+ 5 - 3
ruby/ext/google/protobuf_c/message.c

@@ -105,7 +105,7 @@ void Message_InitPtr(VALUE self_, upb_msg *msg, VALUE arena) {
   Message* self = ruby_to_Message(self_);
   self->msg = msg;
   self->arena = arena;
-  ObjectCache_Add(msg, self_, Arena_get(arena));
+  ObjectCache_Add(msg, self_);
 }
 
 VALUE Message_GetArena(VALUE msg_rb) {
@@ -855,8 +855,10 @@ static VALUE Message_to_h(VALUE _self) {
  */
 static VALUE Message_freeze(VALUE _self) {
   Message* self = ruby_to_Message(_self);
-  ObjectCache_Pin(self->msg, _self, Arena_get(self->arena));
-  RB_OBJ_FREEZE(_self);
+  if (!RB_OBJ_FROZEN(_self)) {
+    Arena_Pin(self->arena, _self);
+    RB_OBJ_FREEZE(_self);
+  }
   return _self;
 }
 

+ 77 - 95
ruby/ext/google/protobuf_c/protobuf.c

@@ -167,30 +167,55 @@ void StringBuilder_PrintMsgval(StringBuilder* b, upb_msgval val,
 // Arena
 // -----------------------------------------------------------------------------
 
-void Arena_free(void* data) { upb_arena_free(data); }
+typedef struct {
+  upb_arena *arena;
+  VALUE pinned_objs;
+} Arena;
+
+static void Arena_mark(void *data) {
+  Arena *arena = data;
+  rb_gc_mark(arena->pinned_objs);
+}
+
+static void Arena_free(void *data) {
+  Arena *arena = data;
+  upb_arena_free(arena->arena);
+}
 
 static VALUE cArena;
 
 const rb_data_type_t Arena_type = {
   "Google::Protobuf::Internal::Arena",
-  { NULL, Arena_free, NULL },
+  { Arena_mark, Arena_free, NULL },
+  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
 };
 
 static VALUE Arena_alloc(VALUE klass) {
-  upb_arena *arena = upb_arena_new();
+  Arena *arena = ALLOC(Arena);
+  arena->arena = upb_arena_new();
+  arena->pinned_objs = Qnil;
   return TypedData_Wrap_Struct(klass, &Arena_type, arena);
 }
 
 upb_arena *Arena_get(VALUE _arena) {
-  upb_arena *arena;
-  TypedData_Get_Struct(_arena, upb_arena, &Arena_type, arena);
-  return arena;
+  Arena *arena;
+  TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
+  return arena->arena;
 }
 
 VALUE Arena_new() {
   return Arena_alloc(cArena);
 }
 
+void Arena_Pin(VALUE _arena, VALUE obj) {
+  Arena *arena;
+  TypedData_Get_Struct(_arena, Arena, &Arena_type, arena);
+  if (arena->pinned_objs == Qnil) {
+    arena->pinned_objs = rb_ary_new();
+  }
+  rb_ary_push(arena->pinned_objs, obj);
+}
+
 void Arena_register(VALUE module) {
   VALUE internal = rb_define_module_under(module, "Internal");
   VALUE klass = rb_define_class_under(internal, "Arena", rb_cObject);
@@ -209,122 +234,79 @@ void Arena_register(VALUE module) {
 // different wrapper objects for the same C object, which saves memory and
 // preserves object identity.
 //
-// We use Hash and/or WeakMap for the cache. WeakMap is faster overall
-// (probably due to removal being integrated with GC) but doesn't work for Ruby
-// <2.7 (see note below). We need Hash for Ruby <2.7 and for cases where we
-// need to GC-root the object (notably when the object has been frozen).
+// We use WeakMap for the cache. For Ruby <2.7 we also need a secondary Hash
+// to store WeakMap keys because Ruby <2.7 WeakMap doesn't allow non-finalizable
+// keys.
 
 #if RUBY_API_VERSION_CODE >= 20700
-#define USE_WEAK_MAP 1
+#define USE_SECONDARY_MAP 0
 #else
-#define USE_WEAK_MAP 0
+#define USE_SECONDARY_MAP 1
 #endif
 
-static VALUE ObjectCache_GetKey(const void* key) {
-  char buf[sizeof(key)];
-  memcpy(&buf, &key, sizeof(key));
-  intptr_t key_int = (intptr_t)key;
-  PBRUBY_ASSERT((key_int & 3) == 0);
-  return LL2NUM(key_int >> 2);
-}
+#if USE_SECONDARY_MAP
 
-// Strong object cache, uses regular Hash and GC-roots objects.
-// - For Ruby <2.7, used for all objects.
-// - For Ruby >=2.7, used only for frozen objects, so we preserve the "frozen"
-//   bit (since this information is not preserved at the upb level).
+// Maps Numeric -> Object. The object is then used as a key into the WeakMap.
+// This is needed for Ruby <2.7 where a number cannot be a key to WeakMap.
+// The object is used only for its identity; it does not contain any data.
+VALUE secondary_map = Qnil;
 
-VALUE strong_obj_cache = Qnil;
-
-static void StrongObjectCache_Init() {
-  rb_gc_register_address(&strong_obj_cache);
-  strong_obj_cache = rb_hash_new();
+static void SecondaryMap_Init() {
+  rb_gc_register_address(&secondary_map);
+  secondary_map = rb_hash_new();
 }
 
-static void StrongObjectCache_Remove(void* key) {
-  VALUE key_rb = ObjectCache_GetKey(key);
-  PBRUBY_ASSERT(rb_hash_lookup(strong_obj_cache, key_rb) != Qnil);
-  rb_hash_delete(strong_obj_cache, key_rb);
+static VALUE SecondaryMap_Get(VALUE key) {
+  VALUE ret = rb_hash_lookup(secondary_map, key);
+  if (ret == Qnil) {
+    ret = rb_eval_string("Object.new");
+    rb_hash_aset(secondary_map, key, ret);
+  }
+  return ret;
 }
 
-static VALUE StrongObjectCache_Get(const void* key) {
-  VALUE key_rb = ObjectCache_GetKey(key);
-  return rb_hash_lookup(strong_obj_cache, key_rb);
-}
+#endif
 
-static void StrongObjectCache_Add(const void* key, VALUE val,
-                                  upb_arena* arena) {
-  PBRUBY_ASSERT(StrongObjectCache_Get(key) == Qnil);
-  VALUE key_rb = ObjectCache_GetKey(key);
-  rb_hash_aset(strong_obj_cache, key_rb, val);
-  upb_arena_addcleanup(arena, (void*)key, StrongObjectCache_Remove);
+static VALUE ObjectCache_GetKey(const void* key) {
+  char buf[sizeof(key)];
+  memcpy(&buf, &key, sizeof(key));
+  intptr_t key_int = (intptr_t)key;
+  PBRUBY_ASSERT((key_int & 3) == 0);
+  VALUE ret = LL2NUM(key_int >> 2);
+#if USE_SECONDARY_MAP
+  ret = SecondaryMap_Get(ret);
+#endif
+  return ret;
 }
 
-// Weak object cache. This speeds up the test suite significantly, so we
-// presume it speeds up real code also. However we can only use it in Ruby
-// >=2.7 due to:
-//   https://bugs.ruby-lang.org/issues/16035
-
-#if USE_WEAK_MAP
+// Public ObjectCache API.
 
 VALUE weak_obj_cache = Qnil;
+ID item_get;
+ID item_set;
 
-static void WeakObjectCache_Init() {
+static void ObjectCache_Init() {
   rb_gc_register_address(&weak_obj_cache);
   VALUE klass = rb_eval_string("ObjectSpace::WeakMap");
   weak_obj_cache = rb_class_new_instance(0, NULL, klass);
-}
-
-static VALUE WeakObjectCache_Get(const void* key) {
-  VALUE key_rb = ObjectCache_GetKey(key);
-  VALUE ret = rb_funcall(weak_obj_cache, rb_intern("[]"), 1, key_rb);
-  return ret;
-}
-
-static void WeakObjectCache_Add(const void* key, VALUE val) {
-  PBRUBY_ASSERT(WeakObjectCache_Get(key) == Qnil);
-  VALUE key_rb = ObjectCache_GetKey(key);
-  rb_funcall(weak_obj_cache, rb_intern("[]="), 2, key_rb, val);
-  PBRUBY_ASSERT(WeakObjectCache_Get(key) == val);
-}
-
-#endif
-
-// Public ObjectCache API.
-
-static void ObjectCache_Init() {
-  StrongObjectCache_Init();
-#if USE_WEAK_MAP
-  WeakObjectCache_Init();
+  item_get = rb_intern("[]");
+  item_set = rb_intern("[]=");
+#if USE_SECONDARY_MAP
+  SecondaryMap_Init();
 #endif
 }
 
-void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena) {
-#if USE_WEAK_MAP
-  (void)arena;
-  WeakObjectCache_Add(key, val);
-#else
-  StrongObjectCache_Add(key, val, arena);
-#endif
+void ObjectCache_Add(const void* key, VALUE val) {
+  PBRUBY_ASSERT(ObjectCache_Get(key) == Qnil);
+  VALUE key_rb = ObjectCache_GetKey(key);
+  rb_funcall(weak_obj_cache, item_set, 2, key_rb, val);
+  PBRUBY_ASSERT(ObjectCache_Get(key) == val);
 }
 
 // Returns the cached object for this key, if any. Otherwise returns Qnil.
 VALUE ObjectCache_Get(const void* key) {
-#if USE_WEAK_MAP
-  return WeakObjectCache_Get(key);
-#else
-  return StrongObjectCache_Get(key);
-#endif
-}
-
-void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena) {
-#if USE_WEAK_MAP
-  PBRUBY_ASSERT(WeakObjectCache_Get(key) == val);
-  // This will GC-root the object, but we'll still use the weak map for
-  // actual lookup.
-  StrongObjectCache_Add(key, val, arena);
-#else
-  // Value is already pinned, nothing to do.
-#endif
+  VALUE key_rb = ObjectCache_GetKey(key);
+  return rb_funcall(weak_obj_cache, item_get, 1, key_rb);
 }
 
 /*

+ 8 - 9
ruby/ext/google/protobuf_c/protobuf.h

@@ -55,6 +55,13 @@ const upb_fielddef* map_field_value(const upb_fielddef* field);
 VALUE Arena_new();
 upb_arena *Arena_get(VALUE arena);
 
+// Pins this Ruby object to the lifetime of this arena, so that as long as the
+// arena is alive this object will not be collected.
+//
+// We use this to guarantee that the "frozen" bit on the object will be
+// remembered, even if the user drops their reference to this precise object.
+void Arena_Pin(VALUE arena, VALUE obj);
+
 // -----------------------------------------------------------------------------
 // ObjectCache
 // -----------------------------------------------------------------------------
@@ -68,19 +75,11 @@ upb_arena *Arena_get(VALUE arena);
 // Adds an entry to the cache. The "arena" parameter must give the arena that
 // "key" was allocated from.  In Ruby <2.7.0, it will be used to remove the key
 // from the cache when the arena is destroyed.
-void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena);
+void ObjectCache_Add(const void* key, VALUE val);
 
 // Returns the cached object for this key, if any. Otherwise returns Qnil.
 VALUE ObjectCache_Get(const void* key);
 
-// Pins the previously added object so it is GC-rooted. This turns the
-// reference to "val" from weak to strong.  We use this to guarantee that the
-// "frozen" bit on the object will be remembered, even if the user drops their
-// reference to this precise object.
-//
-// The "arena" parameter must give the arena that "key" was allocated from.
-void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena);
-
 // -----------------------------------------------------------------------------
 // StringBuilder, for inspect
 // -----------------------------------------------------------------------------

+ 6 - 5
ruby/ext/google/protobuf_c/repeated_field.c

@@ -88,7 +88,7 @@ VALUE RepeatedField_GetRubyWrapper(upb_array* array, TypeInfo type_info,
   if (val == Qnil) {
     val = RepeatedField_alloc(cRepeatedField);
     RepeatedField* self;
-    ObjectCache_Add(array, val, Arena_get(arena));
+    ObjectCache_Add(array, val);
     TypedData_Get_Struct(val, RepeatedField, &RepeatedField_type, self);
     self->array = array;
     self->arena = arena;
@@ -500,9 +500,10 @@ VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
  */
 static VALUE RepeatedField_freeze(VALUE _self) {
   RepeatedField* self = ruby_to_RepeatedField(_self);
-
-  ObjectCache_Pin(self->array, _self, Arena_get(self->arena));
-  RB_OBJ_FREEZE(_self);
+  if (!RB_OBJ_FROZEN(_self)) {
+    Arena_Pin(self->arena, _self);
+    RB_OBJ_FREEZE(_self);
+  }
   return _self;
 }
 
@@ -610,7 +611,7 @@ VALUE RepeatedField_init(int argc, VALUE* argv, VALUE _self) {
 
   self->type_info = TypeInfo_FromClass(argc, argv, 0, &self->type_class, &ary);
   self->array = upb_array_new(arena, self->type_info.type);
-  ObjectCache_Add(self->array, _self, arena);
+  ObjectCache_Add(self->array, _self);
 
   if (ary != Qnil) {
     if (!RB_TYPE_P(ary, T_ARRAY)) {