Просмотр исходного кода

Merge pull request #189 from cfallin/update-ruby-upb

Updated to latest upb and added test for JSON map operation.
Joshua Haberman 10 лет назад
Родитель
Сommit
20490e33ca
4 измененных файлов с 565 добавлено и 111 удалено
  1. 5 7
      ruby/README.md
  2. 526 103
      ruby/ext/google/protobuf_c/upb.c
  3. 26 1
      ruby/ext/google/protobuf_c/upb.h
  4. 8 0
      ruby/tests/basic.rb

+ 5 - 7
ruby/README.md

@@ -18,13 +18,7 @@ To build this Ruby extension, you will need:
 * a C compiler
 * the upb submodule
 
-First, ensure that upb/ is checked out:
-
-    $ cd ..   # top level protobuf directory
-    $ git submodule init
-    $ git submodule update
-
-Then install the required Ruby gems:
+First, install the required Ruby gems:
 
     $ sudo gem install bundler rake rake-compiler rspec rubygems-tasks
 
@@ -32,3 +26,7 @@ Then build the Gem:
 
     $ rake gem
     $ gem install pkg/protobuf-$VERSION.gem
+
+This gem includes the upb parsing and serialization library as a single-file
+amalgamation. It is up-to-date with upb git commit
+`535bc2fe2f2b467f59347ffc9449e11e47791257`.

+ 526 - 103
ruby/ext/google/protobuf_c/upb.c

@@ -210,6 +210,21 @@ static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
     upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
   }
 
+  // Ensure that MapEntry submessages only appear as repeated fields, not
+  // optional/required (singular) fields.
+  if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
+      upb_fielddef_msgsubdef(f) != NULL) {
+    const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
+    if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
+      upb_status_seterrf(s,
+                         "Field %s refers to mapentry message but is not "
+                         "a repeated field",
+                         upb_fielddef_name(f) ? upb_fielddef_name(f) :
+                         "(unnamed)");
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -1242,6 +1257,11 @@ bool upb_fielddef_isprimitive(const upb_fielddef *f) {
   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
 }
 
+bool upb_fielddef_ismap(const upb_fielddef *f) {
+  return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
+         upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
+}
+
 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
 }
@@ -3773,12 +3793,16 @@ char *upb_strdup(const char *s) {
 }
 
 char *upb_strdup2(const char *s, size_t len) {
+  // Prevent overflow errors.
+  if (len == SIZE_MAX) return NULL;
   // Always null-terminate, even if binary data; but don't rely on the input to
   // have a null-terminating byte since it may be a raw binary buffer.
   size_t n = len + 1;
   char *p = malloc(n);
-  if (p) memcpy(p, s, len);
-  p[len] = 0;
+  if (p) {
+    memcpy(p, s, len);
+    p[len] = 0;
+  }
   return p;
 }
 
@@ -9503,11 +9527,17 @@ static void start_number(upb_json_parser *p, const char *ptr) {
   capture_begin(p, ptr);
 }
 
+static bool parse_number(upb_json_parser *p);
+
 static bool end_number(upb_json_parser *p, const char *ptr) {
   if (!capture_end(p, ptr)) {
     return false;
   }
 
+  return parse_number(p);
+}
+
+static bool parse_number(upb_json_parser *p) {
   // strtol() and friends unfortunately do not support specifying the length of
   // the input string, so we need to force a copy into a NULL-terminated buffer.
   if (!multipart_text(p, "\0", 1, false)) {
@@ -9517,8 +9547,8 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
   size_t len;
   const char *buf = accumulate_getptr(p, &len);
   const char *myend = buf + len - 1;  // One for NULL.
-  char *end;
 
+  char *end;
   switch (upb_fielddef_type(p->top->f)) {
     case UPB_TYPE_ENUM:
     case UPB_TYPE_INT32: {
@@ -9574,6 +9604,7 @@ static bool end_number(upb_json_parser *p, const char *ptr) {
   }
 
   multipart_end(p);
+
   return true;
 
 err:
@@ -9592,6 +9623,7 @@ static bool parser_putbool(upb_json_parser *p, bool val) {
 
   bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
   UPB_ASSERT_VAR(ok, ok);
+
   return true;
 }
 
@@ -9608,6 +9640,8 @@ static bool start_stringval(upb_json_parser *p) {
     upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
     inner->m = p->top->m;
     inner->f = p->top->f;
+    inner->is_map = false;
+    inner->is_mapentry = false;
     p->top = inner;
 
     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
@@ -9685,6 +9719,7 @@ static bool end_stringval(upb_json_parser *p) {
   }
 
   multipart_end(p);
+
   return ok;
 }
 
@@ -9693,54 +9728,217 @@ static void start_member(upb_json_parser *p) {
   multipart_startaccum(p);
 }
 
-static bool end_member(upb_json_parser *p) {
-  assert(!p->top->f);
+// Helper: invoked during parse_mapentry() to emit the mapentry message's key
+// field based on the current contents of the accumulate buffer.
+static bool parse_mapentry_key(upb_json_parser *p) {
+
   size_t len;
   const char *buf = accumulate_getptr(p, &len);
 
-  const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+  // Emit the key field. We do a bit of ad-hoc parsing here because the
+  // parser state machine has already decided that this is a string field
+  // name, and we are reinterpreting it as some arbitrary key type. In
+  // particular, integer and bool keys are quoted, so we need to parse the
+  // quoted string contents here.
 
-  if (!f) {
-    // TODO(haberman): Ignore unknown fields if requested/configured to do so.
-    upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
+  if (p->top->f == NULL) {
+    upb_status_seterrmsg(p->status, "mapentry message has no key");
     return false;
   }
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+      // Invoke end_number. The accum buffer has the number's text already.
+      if (!parse_number(p)) {
+        return false;
+      }
+      break;
+    case UPB_TYPE_BOOL:
+      if (len == 4 && !strncmp(buf, "true", 4)) {
+        if (!parser_putbool(p, true)) {
+          return false;
+        }
+      } else if (len == 5 && !strncmp(buf, "false", 5)) {
+        if (!parser_putbool(p, false)) {
+          return false;
+        }
+      } else {
+        upb_status_seterrmsg(p->status,
+                             "Map bool key not 'true' or 'false'");
+        return false;
+      }
+      multipart_end(p);
+      break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      upb_sink subsink;
+      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+      upb_sink_startstr(&p->top->sink, sel, len, &subsink);
+      sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
+      upb_sink_putstring(&subsink, sel, buf, len, NULL);
+      sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+      upb_sink_endstr(&subsink, sel);
+      multipart_end(p);
+      break;
+    }
+    default:
+      upb_status_seterrmsg(p->status, "Invalid field type for map key");
+      return false;
+  }
 
-  p->top->f = f;
-  multipart_end(p);
+  return true;
+}
+
+// Helper: emit one map entry (as a submessage in the map field sequence). This
+// is invoked from end_membername(), at the end of the map entry's key string,
+// with the map key in the accumulate buffer. It parses the key from that
+// buffer, emits the handler calls to start the mapentry submessage (setting up
+// its subframe in the process), and sets up state in the subframe so that the
+// value parser (invoked next) will emit the mapentry's value field and then
+// end the mapentry message.
+
+static bool handle_mapentry(upb_json_parser *p) {
+  // Map entry: p->top->sink is the seq frame, so we need to start a frame
+  // for the mapentry itself, and then set |f| in that frame so that the map
+  // value field is parsed, and also set a flag to end the frame after the
+  // map-entry value is parsed.
+  if (!check_stack(p)) return false;
+
+  const upb_fielddef *mapfield = p->top->mapfield;
+  const upb_msgdef *mapentrymsg = upb_fielddef_msgsubdef(mapfield);
+
+  upb_jsonparser_frame *inner = p->top + 1;
+  p->top->f = mapfield;
+  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+  inner->m = mapentrymsg;
+  inner->mapfield = mapfield;
+  inner->is_map = false;
+
+  // Don't set this to true *yet* -- we reuse parsing handlers below to push
+  // the key field value to the sink, and these handlers will pop the frame
+  // if they see is_mapentry (when invoked by the parser state machine, they
+  // would have just seen the map-entry value, not key).
+  inner->is_mapentry = false;
+  p->top = inner;
+
+  // send STARTMSG in submsg frame.
+  upb_sink_startmsg(&p->top->sink);
+
+  parse_mapentry_key(p);
+
+  // Set up the value field to receive the map-entry value.
+  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
+  p->top->is_mapentry = true;  // set up to pop frame after value is parsed.
+  p->top->mapfield = mapfield;
+  if (p->top->f == NULL) {
+    upb_status_seterrmsg(p->status, "mapentry message has no value");
+    return false;
+  }
 
   return true;
 }
 
-static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
+static bool end_membername(upb_json_parser *p) {
+  assert(!p->top->f);
+
+  if (p->top->is_map) {
+    return handle_mapentry(p);
+  } else {
+    size_t len;
+    const char *buf = accumulate_getptr(p, &len);
+    const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+    if (!f) {
+      // TODO(haberman): Ignore unknown fields if requested/configured to do so.
+      upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+      return false;
+    }
+
+    p->top->f = f;
+    multipart_end(p);
+
+    return true;
+  }
+}
+
+static void end_member(upb_json_parser *p) {
+  // If we just parsed a map-entry value, end that frame too.
+  if (p->top->is_mapentry) {
+    assert(p->top > p->stack);
+    // send ENDMSG on submsg.
+    upb_status s = UPB_STATUS_INIT;
+    upb_sink_endmsg(&p->top->sink, &s);
+    const upb_fielddef* mapfield = p->top->mapfield;
+
+    // send ENDSUBMSG in repeated-field-of-mapentries frame.
+    p->top--;
+    upb_selector_t sel;
+    bool ok = upb_handlers_getselector(mapfield,
+                                       UPB_HANDLER_ENDSUBMSG, &sel);
+    UPB_ASSERT_VAR(ok, ok);
+    upb_sink_endsubmsg(&p->top->sink, sel);
+  }
+
+  p->top->f = NULL;
+}
 
 static bool start_subobject(upb_json_parser *p) {
   assert(p->top->f);
 
-  if (!upb_fielddef_issubmsg(p->top->f)) {
+  if (upb_fielddef_ismap(p->top->f)) {
+    // Beginning of a map. Start a new parser frame in a repeated-field
+    // context.
+    if (!check_stack(p)) return false;
+
+    upb_jsonparser_frame *inner = p->top + 1;
+    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+    upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+    inner->m = upb_fielddef_msgsubdef(p->top->f);
+    inner->mapfield = p->top->f;
+    inner->f = NULL;
+    inner->is_map = true;
+    inner->is_mapentry = false;
+    p->top = inner;
+
+    return true;
+  } else if (upb_fielddef_issubmsg(p->top->f)) {
+    // Beginning of a subobject. Start a new parser frame in the submsg
+    // context.
+    if (!check_stack(p)) return false;
+
+    upb_jsonparser_frame *inner = p->top + 1;
+
+    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+    upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+    inner->m = upb_fielddef_msgsubdef(p->top->f);
+    inner->f = NULL;
+    inner->is_map = false;
+    inner->is_mapentry = false;
+    p->top = inner;
+
+    return true;
+  } else {
     upb_status_seterrf(p->status,
                        "Object specified for non-message/group field: %s",
                        upb_fielddef_name(p->top->f));
     return false;
   }
-
-  if (!check_stack(p)) return false;
-
-  upb_jsonparser_frame *inner = p->top + 1;
-
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
-  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
-  inner->m = upb_fielddef_msgsubdef(p->top->f);
-  inner->f = NULL;
-  p->top = inner;
-
-  return true;
 }
 
 static void end_subobject(upb_json_parser *p) {
-  p->top--;
-  upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
-  upb_sink_endsubmsg(&p->top->sink, sel);
+  if (p->top->is_map) {
+    p->top--;
+    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+    upb_sink_endseq(&p->top->sink, sel);
+  } else {
+    p->top--;
+    upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+    upb_sink_endsubmsg(&p->top->sink, sel);
+  }
 }
 
 static bool start_array(upb_json_parser *p) {
@@ -9760,6 +9958,8 @@ static bool start_array(upb_json_parser *p) {
   upb_sink_startseq(&p->top->sink, sel, &inner->sink);
   inner->m = p->top->m;
   inner->f = p->top->f;
+  inner->is_map = false;
+  inner->is_mapentry = false;
   p->top = inner;
 
   return true;
@@ -9774,12 +9974,16 @@ static void end_array(upb_json_parser *p) {
 }
 
 static void start_object(upb_json_parser *p) {
-  upb_sink_startmsg(&p->top->sink);
+  if (!p->top->is_map) {
+    upb_sink_startmsg(&p->top->sink);
+  }
 }
 
 static void end_object(upb_json_parser *p) {
-  upb_status status;
-  upb_sink_endmsg(&p->top->sink, &status);
+  if (!p->top->is_map) {
+    upb_status status;
+    upb_sink_endmsg(&p->top->sink, &status);
+  }
 }
 
 
@@ -9804,11 +10008,11 @@ static void end_object(upb_json_parser *p) {
 // final state once, when the closing '"' is seen.
 
 
-#line 905 "upb/json/parser.rl"
+#line 1085 "upb/json/parser.rl"
 
 
 
-#line 817 "upb/json/parser.c"
+#line 997 "upb/json/parser.c"
 static const char _json_actions[] = {
 	0, 1, 0, 1, 2, 1, 3, 1, 
 	5, 1, 6, 1, 7, 1, 8, 1, 
@@ -9959,7 +10163,7 @@ static const int json_en_value_machine = 27;
 static const int json_en_main = 1;
 
 
-#line 908 "upb/json/parser.rl"
+#line 1088 "upb/json/parser.rl"
 
 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
              const upb_bufhandle *handle) {
@@ -9979,7 +10183,7 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
   capture_resume(parser, buf);
 
   
-#line 988 "upb/json/parser.c"
+#line 1168 "upb/json/parser.c"
 	{
 	int _klen;
 	unsigned int _trans;
@@ -10054,118 +10258,118 @@ _match:
 		switch ( *_acts++ )
 		{
 	case 0:
-#line 820 "upb/json/parser.rl"
+#line 1000 "upb/json/parser.rl"
 	{ p--; {cs = stack[--top]; goto _again;} }
 	break;
 	case 1:
-#line 821 "upb/json/parser.rl"
+#line 1001 "upb/json/parser.rl"
 	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
 	break;
 	case 2:
-#line 825 "upb/json/parser.rl"
+#line 1005 "upb/json/parser.rl"
 	{ start_text(parser, p); }
 	break;
 	case 3:
-#line 826 "upb/json/parser.rl"
+#line 1006 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
 	break;
 	case 4:
-#line 832 "upb/json/parser.rl"
+#line 1012 "upb/json/parser.rl"
 	{ start_hex(parser); }
 	break;
 	case 5:
-#line 833 "upb/json/parser.rl"
+#line 1013 "upb/json/parser.rl"
 	{ hexdigit(parser, p); }
 	break;
 	case 6:
-#line 834 "upb/json/parser.rl"
+#line 1014 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(end_hex(parser)); }
 	break;
 	case 7:
-#line 840 "upb/json/parser.rl"
+#line 1020 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(escape(parser, p)); }
 	break;
 	case 8:
-#line 846 "upb/json/parser.rl"
+#line 1026 "upb/json/parser.rl"
 	{ p--; {cs = stack[--top]; goto _again;} }
 	break;
 	case 9:
-#line 849 "upb/json/parser.rl"
+#line 1029 "upb/json/parser.rl"
 	{ {stack[top++] = cs; cs = 19; goto _again;} }
 	break;
 	case 10:
-#line 851 "upb/json/parser.rl"
+#line 1031 "upb/json/parser.rl"
 	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
 	break;
 	case 11:
-#line 856 "upb/json/parser.rl"
+#line 1036 "upb/json/parser.rl"
 	{ start_member(parser); }
 	break;
 	case 12:
-#line 857 "upb/json/parser.rl"
-	{ CHECK_RETURN_TOP(end_member(parser)); }
+#line 1037 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_membername(parser)); }
 	break;
 	case 13:
-#line 860 "upb/json/parser.rl"
-	{ clear_member(parser); }
+#line 1040 "upb/json/parser.rl"
+	{ end_member(parser); }
 	break;
 	case 14:
-#line 866 "upb/json/parser.rl"
+#line 1046 "upb/json/parser.rl"
 	{ start_object(parser); }
 	break;
 	case 15:
-#line 869 "upb/json/parser.rl"
+#line 1049 "upb/json/parser.rl"
 	{ end_object(parser); }
 	break;
 	case 16:
-#line 875 "upb/json/parser.rl"
+#line 1055 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(start_array(parser)); }
 	break;
 	case 17:
-#line 879 "upb/json/parser.rl"
+#line 1059 "upb/json/parser.rl"
 	{ end_array(parser); }
 	break;
 	case 18:
-#line 884 "upb/json/parser.rl"
+#line 1064 "upb/json/parser.rl"
 	{ start_number(parser, p); }
 	break;
 	case 19:
-#line 885 "upb/json/parser.rl"
+#line 1065 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
 	break;
 	case 20:
-#line 887 "upb/json/parser.rl"
+#line 1067 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
 	break;
 	case 21:
-#line 888 "upb/json/parser.rl"
+#line 1068 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
 	break;
 	case 22:
-#line 890 "upb/json/parser.rl"
+#line 1070 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
 	break;
 	case 23:
-#line 892 "upb/json/parser.rl"
+#line 1072 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
 	break;
 	case 24:
-#line 894 "upb/json/parser.rl"
+#line 1074 "upb/json/parser.rl"
 	{ /* null value */ }
 	break;
 	case 25:
-#line 896 "upb/json/parser.rl"
+#line 1076 "upb/json/parser.rl"
 	{ CHECK_RETURN_TOP(start_subobject(parser)); }
 	break;
 	case 26:
-#line 897 "upb/json/parser.rl"
+#line 1077 "upb/json/parser.rl"
 	{ end_subobject(parser); }
 	break;
 	case 27:
-#line 902 "upb/json/parser.rl"
+#line 1082 "upb/json/parser.rl"
 	{ p--; {cs = stack[--top]; goto _again;} }
 	break;
-#line 1174 "upb/json/parser.c"
+#line 1354 "upb/json/parser.c"
 		}
 	}
 
@@ -10178,7 +10382,7 @@ _again:
 	_out: {}
 	}
 
-#line 927 "upb/json/parser.rl"
+#line 1107 "upb/json/parser.rl"
 
   if (p != pe) {
     upb_status_seterrf(parser->status, "Parse error at %s\n", p);
@@ -10222,18 +10426,20 @@ void upb_json_parser_uninit(upb_json_parser *p) {
 void upb_json_parser_reset(upb_json_parser *p) {
   p->top = p->stack;
   p->top->f = NULL;
+  p->top->is_map = false;
+  p->top->is_mapentry = false;
 
   int cs;
   int top;
   // Emit Ragel initialization of the parser.
   
-#line 1236 "upb/json/parser.c"
+#line 1418 "upb/json/parser.c"
 	{
 	cs = json_start;
 	top = 0;
 	}
 
-#line 975 "upb/json/parser.rl"
+#line 1157 "upb/json/parser.rl"
   p->current_state = cs;
   p->parser_top = top;
   accumulate_clear(p);
@@ -10434,13 +10640,23 @@ static bool putkey(void *closure, const void *handler_data) {
     return true;                                                             \
   }                                                                          \
   static bool repeated_##type(void *closure, const void *handler_data,       \
-                            type val) {                                      \
+                              type val) {                                    \
     upb_json_printer *p = closure;                                           \
     print_comma(p);                                                          \
     CHK(put##type(closure, handler_data, val));                              \
     return true;                                                             \
   }
 
+#define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
+  static bool putmapkey_##type(void *closure, const void *handler_data,      \
+                            type val) {                                      \
+    upb_json_printer *p = closure;                                           \
+    print_data(p, "\"", 1);                                                  \
+    CHK(put##type(closure, handler_data, val));                              \
+    print_data(p, "\":", 2);                                                 \
+    return true;                                                             \
+  }
+
 TYPE_HANDLERS(double,   fmt_double);
 TYPE_HANDLERS(float,    fmt_float);
 TYPE_HANDLERS(bool,     fmt_bool);
@@ -10449,7 +10665,15 @@ TYPE_HANDLERS(uint32_t, fmt_int64);
 TYPE_HANDLERS(int64_t,  fmt_int64);
 TYPE_HANDLERS(uint64_t, fmt_uint64);
 
+// double and float are not allowed to be map keys.
+TYPE_HANDLERS_MAPKEY(bool,     fmt_bool);
+TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64);
+TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64);
+TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64);
+TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64);
+
 #undef TYPE_HANDLERS
+#undef TYPE_HANDLERS_MAPKEY
 
 typedef struct {
   void *keyname;
@@ -10474,20 +10698,36 @@ static bool scalar_enum(void *closure, const void *handler_data,
   return true;
 }
 
-static bool repeated_enum(void *closure, const void *handler_data,
-                          int32_t val) {
-  const EnumHandlerData *hd = handler_data;
-  upb_json_printer *p = closure;
-  print_comma(p);
-
-  const char *symbolic_name = upb_enumdef_iton(hd->enumdef, val);
+static void print_enum_symbolic_name(upb_json_printer *p,
+                                     const upb_enumdef *def,
+                                     int32_t val) {
+  const char *symbolic_name = upb_enumdef_iton(def, val);
   if (symbolic_name) {
     print_data(p, "\"", 1);
     putstring(p, symbolic_name, strlen(symbolic_name));
     print_data(p, "\"", 1);
   } else {
-    putint32_t(closure, NULL, val);
+    putint32_t(p, NULL, val);
   }
+}
+
+static bool repeated_enum(void *closure, const void *handler_data,
+                          int32_t val) {
+  const EnumHandlerData *hd = handler_data;
+  upb_json_printer *p = closure;
+  print_comma(p);
+
+  print_enum_symbolic_name(p, hd->enumdef, val);
+
+  return true;
+}
+
+static bool mapvalue_enum(void *closure, const void *handler_data,
+                          int32_t val) {
+  const EnumHandlerData *hd = handler_data;
+  upb_json_printer *p = closure;
+
+  print_enum_symbolic_name(p, hd->enumdef, val);
 
   return true;
 }
@@ -10503,25 +10743,35 @@ static void *repeated_startsubmsg(void *closure, const void *handler_data) {
   return closure;
 }
 
-static bool startmap(void *closure, const void *handler_data) {
+static void start_frame(upb_json_printer *p) {
+  p->depth_++;
+  p->first_elem_[p->depth_] = true;
+  print_data(p, "{", 1);
+}
+
+static void end_frame(upb_json_printer *p) {
+  print_data(p, "}", 1);
+  p->depth_--;
+}
+
+static bool printer_startmsg(void *closure, const void *handler_data) {
   UPB_UNUSED(handler_data);
   upb_json_printer *p = closure;
-  if (p->depth_++ == 0) {
+  if (p->depth_ == 0) {
     upb_bytessink_start(p->output_, 0, &p->subc_);
   }
-  p->first_elem_[p->depth_] = true;
-  print_data(p, "{", 1);
+  start_frame(p);
   return true;
 }
 
-static bool endmap(void *closure, const void *handler_data, upb_status *s) {
+static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
   UPB_UNUSED(handler_data);
   UPB_UNUSED(s);
   upb_json_printer *p = closure;
-  if (--p->depth_ == 0) {
+  end_frame(p);
+  if (p->depth_ == 0) {
     upb_bytessink_end(p->output_);
   }
-  print_data(p, "}", 1);
   return true;
 }
 
@@ -10542,6 +10792,23 @@ static bool endseq(void *closure, const void *handler_data) {
   return true;
 }
 
+static void *startmap(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  CHK(putkey(closure, handler_data));
+  p->depth_++;
+  p->first_elem_[p->depth_] = true;
+  print_data(p, "{", 1);
+  return closure;
+}
+
+static bool endmap(void *closure, const void *handler_data) {
+  UPB_UNUSED(handler_data);
+  upb_json_printer *p = closure;
+  print_data(p, "}", 1);
+  p->depth_--;
+  return true;
+}
+
 static size_t putstr(void *closure, const void *handler_data, const char *str,
                      size_t len, const upb_bufhandle *handle) {
   UPB_UNUSED(handler_data);
@@ -10656,6 +10923,36 @@ static bool repeated_endstr(void *closure, const void *handler_data) {
   return true;
 }
 
+static void *mapkeyval_startstr(void *closure, const void *handler_data,
+                                size_t size_hint) {
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(size_hint);
+  upb_json_printer *p = closure;
+  print_data(p, "\"", 1);
+  return p;
+}
+
+static size_t mapkey_str(void *closure, const void *handler_data,
+                         const char *str, size_t len,
+                         const upb_bufhandle *handle) {
+  CHK(putstr(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static bool mapkey_endstr(void *closure, const void *handler_data) {
+  UPB_UNUSED(handler_data);
+  upb_json_printer *p = closure;
+  print_data(p, "\":", 2);
+  return true;
+}
+
+static bool mapvalue_endstr(void *closure, const void *handler_data) {
+  UPB_UNUSED(handler_data);
+  upb_json_printer *p = closure;
+  print_data(p, "\"", 1);
+  return true;
+}
+
 static size_t scalar_bytes(void *closure, const void *handler_data,
                            const char *str, size_t len,
                            const upb_bufhandle *handle) {
@@ -10673,31 +10970,161 @@ static size_t repeated_bytes(void *closure, const void *handler_data,
   return len;
 }
 
-void printer_sethandlers(const void *closure, upb_handlers *h) {
+static size_t mapkey_bytes(void *closure, const void *handler_data,
+                           const char *str, size_t len,
+                           const upb_bufhandle *handle) {
+  upb_json_printer *p = closure;
+  CHK(putbytes(closure, handler_data, str, len, handle));
+  print_data(p, ":", 1);
+  return len;
+}
+
+static void set_enum_hd(upb_handlers *h,
+                        const upb_fielddef *f,
+                        upb_handlerattr *attr) {
+  EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
+  hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
+  hd->keyname = newstrpc(h, f);
+  upb_handlers_addcleanup(h, hd, free);
+  upb_handlerattr_sethandlerdata(attr, hd);
+}
+
+// Set up handlers for a mapentry submessage (i.e., an individual key/value pair
+// in a map).
+//
+// TODO: Handle missing key, missing value, out-of-order key/value, or repeated
+// key or value cases properly. The right way to do this is to allocate a
+// temporary structure at the start of a mapentry submessage, store key and
+// value data in it as key and value handlers are called, and then print the
+// key/value pair once at the end of the submessage. If we don't do this, we
+// should at least detect the case and throw an error. However, so far all of
+// our sources that emit mapentry messages do so canonically (with one key
+// field, and then one value field), so this is not a pressing concern at the
+// moment.
+void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
   UPB_UNUSED(closure);
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+
+  // A mapentry message is printed simply as '"key": value'. Rather than
+  // special-case key and value for every type below, we just handle both
+  // fields explicitly here.
+  const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
+  const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
+
+  upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
+
+  switch (upb_fielddef_type(key_field)) {
+    case UPB_TYPE_INT32:
+      upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
+      break;
+    case UPB_TYPE_INT64:
+      upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT32:
+      upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT64:
+      upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_BOOL:
+      upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
+      break;
+    case UPB_TYPE_STRING:
+      upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
+      upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
+      upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
+      break;
+    case UPB_TYPE_BYTES:
+      upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
+      break;
+    default:
+      assert(false);
+      break;
+  }
+
+  switch (upb_fielddef_type(value_field)) {
+    case UPB_TYPE_INT32:
+      upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_INT64:
+      upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT32:
+      upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT64:
+      upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_BOOL:
+      upb_handlers_setbool(h, value_field, putbool, &empty_attr);
+      break;
+    case UPB_TYPE_FLOAT:
+      upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
+      break;
+    case UPB_TYPE_DOUBLE:
+      upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
+      break;
+    case UPB_TYPE_STRING:
+      upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
+      upb_handlers_setstring(h, value_field, putstr, &empty_attr);
+      upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
+      break;
+    case UPB_TYPE_BYTES:
+      upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
+      break;
+    case UPB_TYPE_ENUM: {
+      upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
+      set_enum_hd(h, value_field, &enum_attr);
+      upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
+      upb_handlerattr_uninit(&enum_attr);
+      break;
+    }
+    case UPB_TYPE_MESSAGE:
+      // No handler necessary -- the submsg handlers will print the message
+      // as appropriate.
+      break;
+  }
+
+  upb_handlerattr_uninit(&empty_attr);
+}
 
+void printer_sethandlers(const void *closure, upb_handlers *h) {
+  UPB_UNUSED(closure);
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+  bool is_mapentry = upb_msgdef_mapentry(md);
   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
-  upb_handlers_setstartmsg(h, startmap, &empty_attr);
-  upb_handlers_setendmsg(h, endmap, &empty_attr);
-
-#define TYPE(type, name, ctype)                                    \
-  case type:                                                       \
-    if (upb_fielddef_isseq(f)) {                                   \
-      upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \
-    } else {                                                       \
-      upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);    \
-    }                                                              \
+
+  if (is_mapentry) {
+    // mapentry messages are sufficiently different that we handle them
+    // separately.
+    printer_sethandlers_mapentry(closure, h);
+    return;
+  }
+
+  upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
+  upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
+
+#define TYPE(type, name, ctype)                                               \
+  case type:                                                                  \
+    if (upb_fielddef_isseq(f)) {                                              \
+      upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
+    } else {                                                                  \
+      upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
+    }                                                                         \
     break;
 
   upb_msg_field_iter i;
-  upb_msg_field_begin(&i, upb_handlers_msgdef(h));
+  upb_msg_field_begin(&i, md);
   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
     const upb_fielddef *f = upb_msg_iter_field(&i);
 
     upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
     upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
 
-    if (upb_fielddef_isseq(f)) {
+    if (upb_fielddef_ismap(f)) {
+      upb_handlers_setstartseq(h, f, startmap, &name_attr);
+      upb_handlers_setendseq(h, f, endmap, &name_attr);
+    } else if (upb_fielddef_isseq(f)) {
       upb_handlers_setstartseq(h, f, startseq, &name_attr);
       upb_handlers_setendseq(h, f, endseq, &empty_attr);
     }
@@ -10714,12 +11141,8 @@ void printer_sethandlers(const void *closure, upb_handlers *h) {
         // For now, we always emit symbolic names for enums. We may want an
         // option later to control this behavior, but we will wait for a real
         // need first.
-        EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
-        hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
-        hd->keyname = newstrpc(h, f);
-        upb_handlers_addcleanup(h, hd, free);
         upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
-        upb_handlerattr_sethandlerdata(&enum_attr, hd);
+        set_enum_hd(h, f, &enum_attr);
 
         if (upb_fielddef_isseq(f)) {
           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);

+ 26 - 1
ruby/ext/google/protobuf_c/upb.h

@@ -1466,6 +1466,7 @@ UPB_DEFINE_DEF(upb::FieldDef, fielddef, FIELD,
   bool IsString() const;
   bool IsSequence() const;
   bool IsPrimitive() const;
+  bool IsMap() const;
 
   // How integers are encoded.  Only meaningful for integer types.
   // Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes.
@@ -1690,6 +1691,7 @@ bool upb_fielddef_issubmsg(const upb_fielddef *f);
 bool upb_fielddef_isstring(const upb_fielddef *f);
 bool upb_fielddef_isseq(const upb_fielddef *f);
 bool upb_fielddef_isprimitive(const upb_fielddef *f);
+bool upb_fielddef_ismap(const upb_fielddef *f);
 int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
 int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
@@ -2078,6 +2080,10 @@ UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m,
 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
 bool upb_msgdef_mapentry(const upb_msgdef *m);
 
+// Well-known field tag numbers for map-entry messages.
+#define UPB_MAPENTRY_KEY   1
+#define UPB_MAPENTRY_VALUE 2
+
 const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m,
                                           const char *name);
 int upb_msgdef_numoneofs(const upb_msgdef *m);
@@ -2577,6 +2583,7 @@ inline bool FieldDef::IsSubMessage() const {
 }
 inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); }
 inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); }
+inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); }
 inline int64_t FieldDef::default_int64() const {
   return upb_fielddef_defaultint64(this);
 }
@@ -7822,12 +7829,30 @@ class Parser;
 
 UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser);
 
-// Internal-only struct used by the parser.
+// Internal-only struct used by the parser. A parser frame corresponds
+// one-to-one with a handler (sink) frame.
 typedef struct {
  UPB_PRIVATE_FOR_CPP
   upb_sink sink;
+  // The current message in which we're parsing, and the field whose value we're
+  // expecting next.
   const upb_msgdef *m;
   const upb_fielddef *f;
+
+  // We are in a repeated-field context, ready to emit mapentries as
+  // submessages. This flag alters the start-of-object (open-brace) behavior to
+  // begin a sequence of mapentry messages rather than a single submessage.
+  bool is_map;
+  // We are in a map-entry message context. This flag is set when parsing the
+  // value field of a single map entry and indicates to all value-field parsers
+  // (subobjects, strings, numbers, and bools) that the map-entry submessage
+  // should end as soon as the value is parsed.
+  bool is_mapentry;
+  // If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+  // message's map field that we're currently parsing. This differs from |f|
+  // because |f| is the field in the *current* message (i.e., the map-entry
+  // message itself), not the parent's field that leads to this map.
+  const upb_fielddef *mapfield;
 } upb_jsonparser_frame;
 
 

+ 8 - 0
ruby/tests/basic.rb

@@ -992,5 +992,13 @@ module BasicTest
       m2 = TestMessage.decode_json(json_text)
       assert m == m2
     end
+
+    def test_json_maps
+      m = MapMessage.new(:map_string_int32 => {"a" => 1})
+      expected = '{"map_string_int32":{"a":1},"map_string_msg":{}}'
+      assert MapMessage.encode_json(m) == expected
+      m2 = MapMessage.decode_json(MapMessage.encode_json(m))
+      assert m == m2
+    end
   end
 end