Ver código fonte

Revert upb changes to minimize changes as we evaluate performance.

Joshua Haberman 5 anos atrás
pai
commit
05644f0313
2 arquivos alterados com 194 adições e 250 exclusões
  1. 161 187
      php/ext/google/protobuf/php-upb.c
  2. 33 63
      php/ext/google/protobuf/php-upb.h

+ 161 - 187
php/ext/google/protobuf/php-upb.c

@@ -81,10 +81,6 @@
 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
 #define UPB_NOINLINE __attribute__((noinline))
 #define UPB_NORETURN __attribute__((__noreturn__))
-#elif defined(_MSC_VER)
-#define UPB_NOINLINE
-#define UPB_FORCEINLINE
-#define UPB_NORETURN __declspec(noreturn)
 #else  /* !defined(__GNUC__) */
 #define UPB_FORCEINLINE
 #define UPB_NOINLINE
@@ -147,7 +143,7 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
 #elif defined _MSC_VER
 #define UPB_ASSUME(expr) if (!(expr)) __assume(0)
 #else
-#define UPB_ASSUME(expr) do {} while (false && (expr))
+#define UPB_ASSUME(expr) do {} if (false && (expr))
 #endif
 #else
 #define UPB_ASSUME(expr) assert(expr)
@@ -327,6 +323,8 @@ typedef struct {
 
 typedef union {
   bool bool_val;
+  int32_t int32_val;
+  int64_t int64_val;
   uint32_t uint32_val;
   uint64_t uint64_val;
   upb_strview str_val;
@@ -426,21 +424,14 @@ static void decode_munge(int type, wireval *val) {
       break;
     case UPB_DESCRIPTOR_TYPE_SINT32: {
       uint32_t n = val->uint32_val;
-      val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
+      val->int32_val = (n >> 1) ^ -(int32_t)(n & 1);
       break;
     }
     case UPB_DESCRIPTOR_TYPE_SINT64: {
       uint64_t n = val->uint64_val;
-      val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
+      val->int64_val = (n >> 1) ^ -(int64_t)(n & 1);
       break;
     }
-    case UPB_DESCRIPTOR_TYPE_INT32:
-    case UPB_DESCRIPTOR_TYPE_UINT32:
-      if (!_upb_isle()) {
-        /* The next stage will memcpy(dst, &val, 4) */
-        val->uint32_val = val->uint64_val;
-      }
-      break;
   }
 }
 
@@ -616,7 +607,7 @@ static void decode_tomap(upb_decstate *d, upb_msg *msg,
   if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
       entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
     /* Create proactively to handle the case where it doesn't appear. */
-    ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], d->arena));
+    ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
   }
 
   decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
@@ -707,16 +698,14 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
         break;
       case UPB_WIRE_TYPE_32BIT:
         if (d->limit - ptr < 4) decode_err(d);
-        memcpy(&val.uint32_val, ptr, 4);
-        val.uint32_val = _upb_be_swap32(val.uint32_val);
+        memcpy(&val, ptr, 4);
         ptr += 4;
         op = OP_SCALAR_LG2(2);
         if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
         break;
       case UPB_WIRE_TYPE_64BIT:
         if (d->limit - ptr < 8) decode_err(d);
-        memcpy(&val.uint64_val, ptr, 8);
-        val.uint64_val = _upb_be_swap64(val.uint64_val);
+        memcpy(&val, ptr, 8);
         ptr += 8;
         op = OP_SCALAR_LG2(3);
         if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
@@ -736,7 +725,7 @@ static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
         break;
       }
       case UPB_WIRE_TYPE_START_GROUP:
-        val.uint32_val = field_number;
+        val.int32_val = field_number;
         op = OP_SUBMSG;
         if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
         break;
@@ -805,30 +794,30 @@ bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
 
 
-#include <setjmp.h>
 #include <string.h>
 
 
 
 #define UPB_PB_VARINT_MAX_LEN 10
+#define CHK(x) do { if (!(x)) { return false; } } while(0)
 
-UPB_NOINLINE
-static size_t encode_varint64(uint64_t val, char *buf) {
-  size_t i = 0;
-  do {
+static size_t upb_encode_varint(uint64_t val, char *buf) {
+  size_t i;
+  if (val < 128) { buf[0] = val; return 1; }
+  i = 0;
+  while (val) {
     uint8_t byte = val & 0x7fU;
     val >>= 7;
     if (val) byte |= 0x80U;
     buf[i++] = byte;
-  } while (val);
+  }
   return i;
 }
 
-static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
-static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
+static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
+static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
 
 typedef struct {
-  jmp_buf err;
   upb_alloc *alloc;
   char *buf, *ptr, *limit;
 } upb_encstate;
@@ -841,15 +830,11 @@ static size_t upb_roundup_pow2(size_t bytes) {
   return ret;
 }
 
-UPB_NORETURN static void encode_err(upb_encstate *e) { longjmp(e->err, 1); }
-
-UPB_NOINLINE
-static void encode_growbuffer(upb_encstate *e, size_t bytes) {
+static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
   size_t old_size = e->limit - e->buf;
   size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
   char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
-
-  if (!new_buf) encode_err(e);
+  CHK(new_buf);
 
   /* We want previous data at the end, realloc() put it at the beginning. */
   if (old_size > 0) {
@@ -859,115 +844,99 @@ static void encode_growbuffer(upb_encstate *e, size_t bytes) {
   e->ptr = new_buf + new_size - (e->limit - e->ptr);
   e->limit = new_buf + new_size;
   e->buf = new_buf;
-
-  e->ptr -= bytes;
+  return true;
 }
 
 /* Call to ensure that at least "bytes" bytes are available for writing at
  * e->ptr.  Returns false if the bytes could not be allocated. */
-UPB_FORCEINLINE
-static void encode_reserve(upb_encstate *e, size_t bytes) {
-  if ((size_t)(e->ptr - e->buf) < bytes) {
-    encode_growbuffer(e, bytes);
-    return;
-  }
+static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
+  CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
+      upb_encode_growbuffer(e, bytes));
 
   e->ptr -= bytes;
+  return true;
 }
 
 /* Writes the given bytes to the buffer, handling reserve/advance. */
-static void encode_bytes(upb_encstate *e, const void *data, size_t len) {
-  if (len == 0) return;  /* memcpy() with zero size is UB */
-  encode_reserve(e, len);
+static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
+  if (len == 0) return true;
+  CHK(upb_encode_reserve(e, len));
   memcpy(e->ptr, data, len);
+  return true;
 }
 
-static void encode_fixed64(upb_encstate *e, uint64_t val) {
-  val = _upb_be_swap64(val);
-  encode_bytes(e, &val, sizeof(uint64_t));
+static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return upb_put_bytes(e, &val, sizeof(uint64_t));
 }
 
-static void encode_fixed32(upb_encstate *e, uint32_t val) {
-  val = _upb_be_swap32(val);
-  encode_bytes(e, &val, sizeof(uint32_t));
+static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return upb_put_bytes(e, &val, sizeof(uint32_t));
 }
 
-UPB_NOINLINE
-static void encode_longvarint(upb_encstate *e, uint64_t val) {
+static bool upb_put_varint(upb_encstate *e, uint64_t val) {
   size_t len;
   char *start;
-
-  encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
-  len = encode_varint64(val, e->ptr);
+  CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
+  len = upb_encode_varint(val, e->ptr);
   start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
   memmove(start, e->ptr, len);
   e->ptr = start;
+  return true;
 }
 
-UPB_FORCEINLINE
-static void encode_varint(upb_encstate *e, uint64_t val) {
-  if (val < 128 && e->ptr != e->buf) {
-    --e->ptr;
-    *e->ptr = val;
-  } else {
-    encode_longvarint(e, val);
-  }
-}
-
-static void encode_double(upb_encstate *e, double d) {
+static bool upb_put_double(upb_encstate *e, double d) {
   uint64_t u64;
   UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
   memcpy(&u64, &d, sizeof(uint64_t));
-  encode_fixed64(e, u64);
+  return upb_put_fixed64(e, u64);
 }
 
-static void encode_float(upb_encstate *e, float d) {
+static bool upb_put_float(upb_encstate *e, float d) {
   uint32_t u32;
   UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
   memcpy(&u32, &d, sizeof(uint32_t));
-  encode_fixed32(e, u32);
+  return upb_put_fixed32(e, u32);
 }
 
-static void encode_tag(upb_encstate *e, int field_number, int wire_type) {
-  encode_varint(e, (field_number << 3) | wire_type);
+static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
+  return upb_put_varint(e, (field_number << 3) | wire_type);
 }
 
-static void encode_fixedarray(upb_encstate *e, const upb_array *arr,
+static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
                                size_t elem_size, uint32_t tag) {
   size_t bytes = arr->len * elem_size;
   const char* data = _upb_array_constptr(arr);
   const char* ptr = data + bytes - elem_size;
   if (tag) {
     while (true) {
-      encode_bytes(e, ptr, elem_size);
-      encode_varint(e, tag);
+      CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag));
       if (ptr == data) break;
       ptr -= elem_size;
     }
+    return true;
   } else {
-    encode_bytes(e, data, bytes);
+    return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
   }
 }
 
-static void encode_message(upb_encstate *e, const char *msg,
-                           const upb_msglayout *m, size_t *size);
+bool upb_encode_message(upb_encstate *e, const char *msg,
+                        const upb_msglayout *m, size_t *size);
 
-static void encode_scalar(upb_encstate *e, const void *_field_mem,
-                          const upb_msglayout *m, const upb_msglayout_field *f,
-                          bool skip_zero_value) {
+static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem,
+                                   const upb_msglayout *m,
+                                   const upb_msglayout_field *f,
+                                   bool skip_zero_value) {
   const char *field_mem = _field_mem;
-  int wire_type;
-
-#define CASE(ctype, type, wtype, encodeval) \
-  {                                         \
-    ctype val = *(ctype *)field_mem;        \
-    if (skip_zero_value && val == 0) {      \
-      return;                               \
-    }                                       \
-    encode_##type(e, encodeval);            \
-    wire_type = wtype;                      \
-    break;                                  \
-  }
+#define CASE(ctype, type, wire_type, encodeval) do { \
+  ctype val = *(ctype*)field_mem; \
+  if (skip_zero_value && val == 0) { \
+    return true; \
+  } \
+  return upb_put_ ## type(e, encodeval) && \
+      upb_put_tag(e, f->number, wire_type); \
+} while(0)
 
   switch (f->descriptortype) {
     case UPB_DESCRIPTOR_TYPE_DOUBLE:
@@ -991,91 +960,90 @@ static void encode_scalar(upb_encstate *e, const void *_field_mem,
     case UPB_DESCRIPTOR_TYPE_BOOL:
       CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
     case UPB_DESCRIPTOR_TYPE_SINT32:
-      CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val));
+      CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
     case UPB_DESCRIPTOR_TYPE_SINT64:
-      CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val));
+      CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
     case UPB_DESCRIPTOR_TYPE_STRING:
     case UPB_DESCRIPTOR_TYPE_BYTES: {
       upb_strview view = *(upb_strview*)field_mem;
       if (skip_zero_value && view.size == 0) {
-        return;
+        return true;
       }
-      encode_bytes(e, view.data, view.size);
-      encode_varint(e, view.size);
-      wire_type = UPB_WIRE_TYPE_DELIMITED;
-      break;
+      return upb_put_bytes(e, view.data, view.size) &&
+          upb_put_varint(e, view.size) &&
+          upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
     }
     case UPB_DESCRIPTOR_TYPE_GROUP: {
       size_t size;
       void *submsg = *(void **)field_mem;
       const upb_msglayout *subm = m->submsgs[f->submsg_index];
       if (submsg == NULL) {
-        return;
+        return true;
       }
-      encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
-      encode_message(e, submsg, subm, &size);
-      wire_type = UPB_WIRE_TYPE_START_GROUP;
-      break;
+      return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
+          upb_encode_message(e, submsg, subm, &size) &&
+          upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
     }
     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
       size_t size;
       void *submsg = *(void **)field_mem;
       const upb_msglayout *subm = m->submsgs[f->submsg_index];
       if (submsg == NULL) {
-        return;
+        return true;
       }
-      encode_message(e, submsg, subm, &size);
-      encode_varint(e, size);
-      wire_type = UPB_WIRE_TYPE_DELIMITED;
-      break;
+      return upb_encode_message(e, submsg, subm, &size) &&
+          upb_put_varint(e, size) &&
+          upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
     }
-    default:
-      UPB_UNREACHABLE();
   }
 #undef CASE
-
-  encode_tag(e, f->number, wire_type);
+  UPB_UNREACHABLE();
 }
 
-static void encode_array(upb_encstate *e, const char *field_mem,
-                         const upb_msglayout *m, const upb_msglayout_field *f) {
+static bool upb_encode_array(upb_encstate *e, const char *field_mem,
+                             const upb_msglayout *m,
+                             const upb_msglayout_field *f) {
   const upb_array *arr = *(const upb_array**)field_mem;
   bool packed = f->label == _UPB_LABEL_PACKED;
-  size_t pre_len = e->limit - e->ptr;
 
   if (arr == NULL || arr->len == 0) {
-    return;
+    return true;
   }
 
 #define VARINT_CASE(ctype, encode)                                       \
   {                                                                      \
     const ctype *start = _upb_array_constptr(arr);                       \
     const ctype *ptr = start + arr->len;                                 \
+    size_t pre_len = e->limit - e->ptr;                                  \
     uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
     do {                                                                 \
       ptr--;                                                             \
-      encode_varint(e, encode);                                          \
-      if (tag) encode_varint(e, tag);                                    \
+      CHK(upb_put_varint(e, encode));                                    \
+      if (tag) CHK(upb_put_varint(e, tag));                              \
     } while (ptr != start);                                              \
+    if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len));       \
   }                                                                      \
-  break;
+  break;                                                                 \
+  do {                                                                   \
+    ;                                                                    \
+  } while (0)
 
 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
 
   switch (f->descriptortype) {
     case UPB_DESCRIPTOR_TYPE_DOUBLE:
-      encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT));
+      CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)));
       break;
     case UPB_DESCRIPTOR_TYPE_FLOAT:
-      encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT));
+      CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)));
       break;
     case UPB_DESCRIPTOR_TYPE_SFIXED64:
     case UPB_DESCRIPTOR_TYPE_FIXED64:
-      encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT));
+      CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)));
       break;
     case UPB_DESCRIPTOR_TYPE_FIXED32:
     case UPB_DESCRIPTOR_TYPE_SFIXED32:
-      encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT));
+      CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)));
       break;
     case UPB_DESCRIPTOR_TYPE_INT64:
     case UPB_DESCRIPTOR_TYPE_UINT64:
@@ -1088,20 +1056,20 @@ static void encode_array(upb_encstate *e, const char *field_mem,
     case UPB_DESCRIPTOR_TYPE_BOOL:
       VARINT_CASE(bool, *ptr);
     case UPB_DESCRIPTOR_TYPE_SINT32:
-      VARINT_CASE(int32_t, encode_zz32(*ptr));
+      VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
     case UPB_DESCRIPTOR_TYPE_SINT64:
-      VARINT_CASE(int64_t, encode_zz64(*ptr));
+      VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
     case UPB_DESCRIPTOR_TYPE_STRING:
     case UPB_DESCRIPTOR_TYPE_BYTES: {
       const upb_strview *start = _upb_array_constptr(arr);
       const upb_strview *ptr = start + arr->len;
       do {
         ptr--;
-        encode_bytes(e, ptr->data, ptr->size);
-        encode_varint(e, ptr->size);
-        encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
+        CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
+            upb_put_varint(e, ptr->size) &&
+            upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
       } while (ptr != start);
-      return;
+      return true;
     }
     case UPB_DESCRIPTOR_TYPE_GROUP: {
       const void *const*start = _upb_array_constptr(arr);
@@ -1110,11 +1078,11 @@ static void encode_array(upb_encstate *e, const char *field_mem,
       do {
         size_t size;
         ptr--;
-        encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
-        encode_message(e, *ptr, subm, &size);
-        encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
+        CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
+            upb_encode_message(e, *ptr, subm, &size) &&
+            upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
       } while (ptr != start);
-      return;
+      return true;
     }
     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
       const void *const*start = _upb_array_constptr(arr);
@@ -1123,30 +1091,31 @@ static void encode_array(upb_encstate *e, const char *field_mem,
       do {
         size_t size;
         ptr--;
-        encode_message(e, *ptr, subm, &size);
-        encode_varint(e, size);
-        encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
+        CHK(upb_encode_message(e, *ptr, subm, &size) &&
+            upb_put_varint(e, size) &&
+            upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
       } while (ptr != start);
-      return;
+      return true;
     }
   }
 #undef VARINT_CASE
 
   if (packed) {
-    encode_varint(e, e->limit - e->ptr - pre_len);
-    encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
+    CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
   }
+  return true;
 }
 
-static void encode_map(upb_encstate *e, const char *field_mem,
-                       const upb_msglayout *m, const upb_msglayout_field *f) {
+static bool upb_encode_map(upb_encstate *e, const char *field_mem,
+                           const upb_msglayout *m,
+                           const upb_msglayout_field *f) {
   const upb_map *map = *(const upb_map**)field_mem;
   const upb_msglayout *entry = m->submsgs[f->submsg_index];
   const upb_msglayout_field *key_field = &entry->fields[0];
   const upb_msglayout_field *val_field = &entry->fields[1];
   upb_strtable_iter i;
   if (map == NULL) {
-    return;
+    return true;
   }
 
   upb_strtable_begin(&i, &map->table);
@@ -1158,57 +1127,59 @@ static void encode_map(upb_encstate *e, const char *field_mem,
     upb_map_entry ent;
     _upb_map_fromkey(key, &ent.k, map->key_size);
     _upb_map_fromvalue(val, &ent.v, map->val_size);
-    encode_scalar(e, &ent.v, entry, val_field, false);
-    encode_scalar(e, &ent.k, entry, key_field, false);
+    CHK(upb_encode_scalarfield(e, &ent.v, entry, val_field, false));
+    CHK(upb_encode_scalarfield(e, &ent.k, entry, key_field, false));
     size = (e->limit - e->ptr) - pre_len;
-    encode_varint(e, size);
-    encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
+    CHK(upb_put_varint(e, size));
+    CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
   }
-}
 
-static void encode_scalarfield(upb_encstate *e, const char *msg,
-                               const upb_msglayout *m,
-                               const upb_msglayout_field *f) {
-  bool skip_empty = false;
-  if (f->presence == 0) {
-    /* Proto3 presence. */
-    skip_empty = true;
-  } else if (f->presence > 0) {
-    /* Proto2 presence: hasbit. */
-    if (!_upb_hasbit_field(msg, f)) return;
-  } else {
-    /* Field is in a oneof. */
-    if (_upb_getoneofcase_field(msg, f) != f->number) return;
-  }
-  encode_scalar(e, msg + f->offset, m, f, skip_empty);
+  return true;
 }
 
-static void encode_message(upb_encstate *e, const char *msg,
-                           const upb_msglayout *m, size_t *size) {
+
+bool upb_encode_message(upb_encstate *e, const char *msg,
+                        const upb_msglayout *m, size_t *size) {
+  int i;
   size_t pre_len = e->limit - e->ptr;
   const char *unknown;
   size_t unknown_size;
-  const upb_msglayout_field *f = &m->fields[m->field_count];
-  const upb_msglayout_field *first = &m->fields[0];
 
   unknown = upb_msg_getunknown(msg, &unknown_size);
 
   if (unknown) {
-    encode_bytes(e, unknown, unknown_size);
+    upb_put_bytes(e, unknown, unknown_size);
   }
 
-  while (f != first) {
-    f--;
+  for (i = m->field_count - 1; i >= 0; i--) {
+    const upb_msglayout_field *f = &m->fields[i];
+
     if (_upb_isrepeated(f)) {
-      encode_array(e, msg + f->offset, m, f);
+      CHK(upb_encode_array(e, msg + f->offset, m, f));
     } else if (f->label == _UPB_LABEL_MAP) {
-      encode_map(e, msg + f->offset, m, f);
+      CHK(upb_encode_map(e, msg + f->offset, m, f));
     } else {
-      encode_scalarfield(e, msg, m, f);
+      bool skip_empty = false;
+      if (f->presence == 0) {
+        /* Proto3 presence. */
+        skip_empty = true;
+      } else if (f->presence > 0) {
+        /* Proto2 presence: hasbit. */
+        if (!_upb_hasbit_field(msg, f)) {
+          continue;
+        }
+      } else {
+        /* Field is in a oneof. */
+        if (_upb_getoneofcase_field(msg, f) != f->number) {
+          continue;
+        }
+      }
+      CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
     }
   }
 
   *size = (e->limit - e->ptr) - pre_len;
+  return true;
 }
 
 char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
@@ -1219,13 +1190,11 @@ char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
   e.limit = NULL;
   e.ptr = NULL;
 
-  if (setjmp(e.err)) {
+  if (!upb_encode_message(&e, msg, m, size)) {
     *size = 0;
     return NULL;
   }
 
-  encode_message(&e, msg, m, size);
-
   *size = e.limit - e.ptr;
 
   if (*size == 0) {
@@ -1237,6 +1206,8 @@ char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
   }
 }
 
+#undef CHK
+
 
 
 
@@ -3092,7 +3063,7 @@ extern const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit;
 extern const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit;
 extern const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit;
 
-static const upb_msglayout *google_protobuf_descriptor_proto_layouts[27] = {
+static const upb_msglayout *layouts[27] = {
   &google_protobuf_FileDescriptorSet_msginit,
   &google_protobuf_FileDescriptorProto_msginit,
   &google_protobuf_DescriptorProto_msginit,
@@ -3122,8 +3093,7 @@ static const upb_msglayout *google_protobuf_descriptor_proto_layouts[27] = {
   &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
 };
 
-static const char google_protobuf_descriptor_proto_descriptor[7619] = {
-'\n', ' ', 'g', 'o', 'o', 'g', 'l', 'e', '/', 'p', 'r', 'o', 't', 'o', 'b', 'u', 'f', '/', 'd', 'e', 's', 'c', 'r', 'i', 'p', 
+static const char descriptor[7619] = {'\n', ' ', 'g', 'o', 'o', 'g', 'l', 'e', '/', 'p', 'r', 'o', 't', 'o', 'b', 'u', 'f', '/', 'd', 'e', 's', 'c', 'r', 'i', 'p', 
 't', 'o', 'r', '.', 'p', 'r', 'o', 't', 'o', '\022', '\017', 'g', 'o', 'o', 'g', 'l', 'e', '.', 'p', 'r', 'o', 't', 'o', 'b', 'u', 
 'f', '\"', 'M', '\n', '\021', 'F', 'i', 'l', 'e', 'D', 'e', 's', 'c', 'r', 'i', 'p', 't', 'o', 'r', 'S', 'e', 't', '\022', '8', '\n', 
 '\004', 'f', 'i', 'l', 'e', '\030', '\001', ' ', '\003', '(', '\013', '2', '$', '.', 'g', 'o', 'o', 'g', 'l', 'e', '.', 'p', 'r', 'o', 't', 
@@ -3430,15 +3400,15 @@ static const char google_protobuf_descriptor_proto_descriptor[7619] = {
 'P', 'r', 'o', 't', 'o', 'b', 'u', 'f', '.', 'R', 'e', 'f', 'l', 'e', 'c', 't', 'i', 'o', 'n', 
 };
 
-static upb_def_init *google_protobuf_descriptor_proto_deps[1] = {
+static upb_def_init *deps[1] = {
   NULL
 };
 
-upb_def_init google_protobuf_descriptor_proto_definit = {
-  google_protobuf_descriptor_proto_deps,
-  google_protobuf_descriptor_proto_layouts,
+upb_def_init google_protobuf_descriptor_proto_upbdefinit = {
+  deps,
+  layouts,
   "google/protobuf/descriptor.proto",
-  UPB_STRVIEW_INIT(google_protobuf_descriptor_proto_descriptor, 7619)
+  UPB_STRVIEW_INIT(descriptor, 7619)
 };
 
 
@@ -5696,7 +5666,11 @@ const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg,
   if (upb_oneof_done(&i)) return false;
   f = upb_oneof_iter_field(&i);
   field = upb_fielddef_layout(f);
-  oneof_case = _upb_getoneofcase_field(msg, field);
+  if (in_oneof(field)) {
+    oneof_case = _upb_getoneofcase_field(msg, field);
+  } else {
+    return _upb_hasbit_field(msg, field) ? f : NULL;
+  }
 
   return oneof_case ? upb_msgdef_itof(m, oneof_case) : NULL;
 }

+ 33 - 63
php/ext/google/protobuf/php-upb.h

@@ -80,10 +80,6 @@
 #define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
 #define UPB_NOINLINE __attribute__((noinline))
 #define UPB_NORETURN __attribute__((__noreturn__))
-#elif defined(_MSC_VER)
-#define UPB_NOINLINE
-#define UPB_FORCEINLINE
-#define UPB_NORETURN __declspec(noreturn)
 #else  /* !defined(__GNUC__) */
 #define UPB_FORCEINLINE
 #define UPB_NOINLINE
@@ -146,7 +142,7 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
 #elif defined _MSC_VER
 #define UPB_ASSUME(expr) if (!(expr)) __assume(0)
 #else
-#define UPB_ASSUME(expr) do {} while (false && (expr))
+#define UPB_ASSUME(expr) do {} if (false && (expr))
 #endif
 #else
 #define UPB_ASSUME(expr) assert(expr)
@@ -217,7 +213,7 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
 ** store pointers or integers of at least 32 bits (upb isn't really useful on
 ** systems where sizeof(void*) < 4).
 **
-** The table must be homogenous (all values of the same type).  In debug
+** The table must be homogeneous (all values of the same type).  In debug
 ** mode, we check this on insert and lookup.
 */
 
@@ -500,32 +496,6 @@ typedef enum {
 
 #define UPB_MAP_BEGIN ((size_t)-1)
 
-UPB_INLINE bool _upb_isle(void) {
-  int x = 1;
-  return *(char*)&x == 1;
-}
-
-UPB_INLINE uint32_t _upb_be_swap32(uint32_t val) {
-  if (_upb_isle()) {
-    return val;
-  } else {
-    return ((val & 0xff) << 24) | ((val & 0xff00) << 8) |
-           ((val & 0xff0000ULL) >> 8) | ((val & 0xff000000ULL) >> 24);
-  }
-}
-
-UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) {
-  if (_upb_isle()) {
-    return val;
-  } else {
-    return ((val & 0xff) << 56) | ((val & 0xff00) << 40) |
-           ((val & 0xff0000) << 24) | ((val & 0xff000000) << 8) |
-           ((val & 0xff00000000ULL) >> 8) | ((val & 0xff0000000000ULL) >> 24) |
-           ((val & 0xff000000000000ULL) >> 40) |
-           ((val & 0xff00000000000000ULL) >> 56);
-  }
-}
-
 
 #ifdef __cplusplus
 }  /* extern "C" */
@@ -1281,7 +1251,7 @@ UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size,
   if (size == UPB_MAPTYPE_STRING) {
     upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
     *strp = *(upb_strview*)val;
-    ret = upb_value_ptr(strp);
+    memcpy(&ret, &strp, sizeof(strp));
   } else {
     memcpy(&ret, val, size);
   }
@@ -1412,7 +1382,7 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size)
   /* This is like _upb_map_tovalue() except the entry already exists so we can
    * reuse the allocated upb_strview for string fields. */
   if (size == UPB_MAPTYPE_STRING) {
-    upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val;
+    upb_strview *strp = (upb_strview*)ent->val.val;
     memcpy(strp, val, sizeof(*strp));
   } else {
     memcpy(&ent->val.val, val, size);
@@ -3519,140 +3489,140 @@ extern "C" {
 
 
 
-extern upb_def_init google_protobuf_descriptor_proto_definit;
+extern upb_def_init google_protobuf_descriptor_proto_upbdefinit;
 
 UPB_INLINE const upb_msgdef *google_protobuf_FileDescriptorSet_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.FileDescriptorSet");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_FileDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.FileDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_DescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_DescriptorProto_ExtensionRange_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto.ExtensionRange");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_DescriptorProto_ReservedRange_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto.ReservedRange");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_ExtensionRangeOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.ExtensionRangeOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_FieldDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.FieldDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_OneofDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.OneofDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_EnumDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.EnumDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_EnumDescriptorProto_EnumReservedRange_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.EnumDescriptorProto.EnumReservedRange");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_EnumValueDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.EnumValueDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_ServiceDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.ServiceDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_MethodDescriptorProto_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.MethodDescriptorProto");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_FileOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.FileOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_MessageOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.MessageOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_FieldOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.FieldOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_OneofOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.OneofOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_EnumOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.EnumOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_EnumValueOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.EnumValueOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_ServiceOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.ServiceOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_MethodOptions_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.MethodOptions");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_UninterpretedOption_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.UninterpretedOption");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_UninterpretedOption_NamePart_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.UninterpretedOption.NamePart");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_SourceCodeInfo_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.SourceCodeInfo");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_SourceCodeInfo_Location_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.SourceCodeInfo.Location");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_GeneratedCodeInfo_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.GeneratedCodeInfo");
 }
 
 UPB_INLINE const upb_msgdef *google_protobuf_GeneratedCodeInfo_Annotation_getmsgdef(upb_symtab *s) {
-  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_definit);
+  _upb_symtab_loaddefinit(s, &google_protobuf_descriptor_proto_upbdefinit);
   return upb_symtab_lookupmsg(s, "google.protobuf.GeneratedCodeInfo.Annotation");
 }
 
@@ -3857,7 +3827,7 @@ extern "C" {
 #endif
 
 enum {
-  /* When set, emits 0/default values.  TOOD(haberman): proto3 only? */
+  /* When set, emits 0/default values.  TODO(haberman): proto3 only? */
   UPB_JSONENC_EMITDEFAULTS = 1,
 
   /* When set, use normal (snake_caes) field names instead of JSON (camelCase)