encode.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
  2. #include "upb/encode.h"
  3. #include <setjmp.h>
  4. #include <string.h>
  5. #include "upb/msg.h"
  6. #include "upb/upb.h"
  7. /* Must be last. */
  8. #include "upb/port_def.inc"
  9. #define UPB_PB_VARINT_MAX_LEN 10
  10. UPB_NOINLINE
  11. static size_t encode_varint64(uint64_t val, char *buf) {
  12. size_t i = 0;
  13. do {
  14. uint8_t byte = val & 0x7fU;
  15. val >>= 7;
  16. if (val) byte |= 0x80U;
  17. buf[i++] = byte;
  18. } while (val);
  19. return i;
  20. }
  21. static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
  22. static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
  23. typedef struct {
  24. jmp_buf err;
  25. upb_alloc *alloc;
  26. char *buf, *ptr, *limit;
  27. int options;
  28. int depth;
  29. _upb_mapsorter sorter;
  30. } upb_encstate;
  31. static size_t upb_roundup_pow2(size_t bytes) {
  32. size_t ret = 128;
  33. while (ret < bytes) {
  34. ret *= 2;
  35. }
  36. return ret;
  37. }
  38. UPB_NORETURN static void encode_err(upb_encstate *e) {
  39. UPB_LONGJMP(e->err, 1);
  40. }
  41. UPB_NOINLINE
  42. static void encode_growbuffer(upb_encstate *e, size_t bytes) {
  43. size_t old_size = e->limit - e->buf;
  44. size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
  45. char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
  46. if (!new_buf) encode_err(e);
  47. /* We want previous data at the end, realloc() put it at the beginning. */
  48. if (old_size > 0) {
  49. memmove(new_buf + new_size - old_size, e->buf, old_size);
  50. }
  51. e->ptr = new_buf + new_size - (e->limit - e->ptr);
  52. e->limit = new_buf + new_size;
  53. e->buf = new_buf;
  54. e->ptr -= bytes;
  55. }
  56. /* Call to ensure that at least "bytes" bytes are available for writing at
  57. * e->ptr. Returns false if the bytes could not be allocated. */
  58. UPB_FORCEINLINE
  59. static void encode_reserve(upb_encstate *e, size_t bytes) {
  60. if ((size_t)(e->ptr - e->buf) < bytes) {
  61. encode_growbuffer(e, bytes);
  62. return;
  63. }
  64. e->ptr -= bytes;
  65. }
  66. /* Writes the given bytes to the buffer, handling reserve/advance. */
  67. static void encode_bytes(upb_encstate *e, const void *data, size_t len) {
  68. if (len == 0) return; /* memcpy() with zero size is UB */
  69. encode_reserve(e, len);
  70. memcpy(e->ptr, data, len);
  71. }
  72. static void encode_fixed64(upb_encstate *e, uint64_t val) {
  73. val = _upb_be_swap64(val);
  74. encode_bytes(e, &val, sizeof(uint64_t));
  75. }
  76. static void encode_fixed32(upb_encstate *e, uint32_t val) {
  77. val = _upb_be_swap32(val);
  78. encode_bytes(e, &val, sizeof(uint32_t));
  79. }
  80. UPB_NOINLINE
  81. static void encode_longvarint(upb_encstate *e, uint64_t val) {
  82. size_t len;
  83. char *start;
  84. encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
  85. len = encode_varint64(val, e->ptr);
  86. start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
  87. memmove(start, e->ptr, len);
  88. e->ptr = start;
  89. }
  90. UPB_FORCEINLINE
  91. static void encode_varint(upb_encstate *e, uint64_t val) {
  92. if (val < 128 && e->ptr != e->buf) {
  93. --e->ptr;
  94. *e->ptr = val;
  95. } else {
  96. encode_longvarint(e, val);
  97. }
  98. }
  99. static void encode_double(upb_encstate *e, double d) {
  100. uint64_t u64;
  101. UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
  102. memcpy(&u64, &d, sizeof(uint64_t));
  103. encode_fixed64(e, u64);
  104. }
  105. static void encode_float(upb_encstate *e, float d) {
  106. uint32_t u32;
  107. UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
  108. memcpy(&u32, &d, sizeof(uint32_t));
  109. encode_fixed32(e, u32);
  110. }
  111. static void encode_tag(upb_encstate *e, uint32_t field_number,
  112. uint8_t wire_type) {
  113. encode_varint(e, (field_number << 3) | wire_type);
  114. }
  115. static void encode_fixedarray(upb_encstate *e, const upb_array *arr,
  116. size_t elem_size, uint32_t tag) {
  117. size_t bytes = arr->len * elem_size;
  118. const char* data = _upb_array_constptr(arr);
  119. const char* ptr = data + bytes - elem_size;
  120. if (tag) {
  121. while (true) {
  122. encode_bytes(e, ptr, elem_size);
  123. encode_varint(e, tag);
  124. if (ptr == data) break;
  125. ptr -= elem_size;
  126. }
  127. } else {
  128. encode_bytes(e, data, bytes);
  129. }
  130. }
  131. static void encode_message(upb_encstate *e, const char *msg,
  132. const upb_msglayout *m, size_t *size);
  133. static void encode_scalar(upb_encstate *e, const void *_field_mem,
  134. const upb_msglayout *m, const upb_msglayout_field *f,
  135. bool skip_zero_value) {
  136. const char *field_mem = _field_mem;
  137. int wire_type;
  138. #define CASE(ctype, type, wtype, encodeval) \
  139. { \
  140. ctype val = *(ctype *)field_mem; \
  141. if (skip_zero_value && val == 0) { \
  142. return; \
  143. } \
  144. encode_##type(e, encodeval); \
  145. wire_type = wtype; \
  146. break; \
  147. }
  148. switch (f->descriptortype) {
  149. case UPB_DESCRIPTOR_TYPE_DOUBLE:
  150. CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
  151. case UPB_DESCRIPTOR_TYPE_FLOAT:
  152. CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
  153. case UPB_DESCRIPTOR_TYPE_INT64:
  154. case UPB_DESCRIPTOR_TYPE_UINT64:
  155. CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
  156. case UPB_DESCRIPTOR_TYPE_UINT32:
  157. CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
  158. case UPB_DESCRIPTOR_TYPE_INT32:
  159. case UPB_DESCRIPTOR_TYPE_ENUM:
  160. CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
  161. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  162. case UPB_DESCRIPTOR_TYPE_FIXED64:
  163. CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
  164. case UPB_DESCRIPTOR_TYPE_FIXED32:
  165. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  166. CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
  167. case UPB_DESCRIPTOR_TYPE_BOOL:
  168. CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
  169. case UPB_DESCRIPTOR_TYPE_SINT32:
  170. CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val));
  171. case UPB_DESCRIPTOR_TYPE_SINT64:
  172. CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val));
  173. case UPB_DESCRIPTOR_TYPE_STRING:
  174. case UPB_DESCRIPTOR_TYPE_BYTES: {
  175. upb_strview view = *(upb_strview*)field_mem;
  176. if (skip_zero_value && view.size == 0) {
  177. return;
  178. }
  179. encode_bytes(e, view.data, view.size);
  180. encode_varint(e, view.size);
  181. wire_type = UPB_WIRE_TYPE_DELIMITED;
  182. break;
  183. }
  184. case UPB_DESCRIPTOR_TYPE_GROUP: {
  185. size_t size;
  186. void *submsg = *(void **)field_mem;
  187. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  188. if (submsg == NULL) {
  189. return;
  190. }
  191. if (--e->depth == 0) encode_err(e);
  192. encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
  193. encode_message(e, submsg, subm, &size);
  194. wire_type = UPB_WIRE_TYPE_START_GROUP;
  195. e->depth++;
  196. break;
  197. }
  198. case UPB_DESCRIPTOR_TYPE_MESSAGE: {
  199. size_t size;
  200. void *submsg = *(void **)field_mem;
  201. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  202. if (submsg == NULL) {
  203. return;
  204. }
  205. if (--e->depth == 0) encode_err(e);
  206. encode_message(e, submsg, subm, &size);
  207. encode_varint(e, size);
  208. wire_type = UPB_WIRE_TYPE_DELIMITED;
  209. e->depth++;
  210. break;
  211. }
  212. default:
  213. UPB_UNREACHABLE();
  214. }
  215. #undef CASE
  216. encode_tag(e, f->number, wire_type);
  217. }
  218. static void encode_array(upb_encstate *e, const char *field_mem,
  219. const upb_msglayout *m, const upb_msglayout_field *f) {
  220. const upb_array *arr = *(const upb_array**)field_mem;
  221. bool packed = f->label == _UPB_LABEL_PACKED;
  222. size_t pre_len = e->limit - e->ptr;
  223. if (arr == NULL || arr->len == 0) {
  224. return;
  225. }
  226. #define VARINT_CASE(ctype, encode) \
  227. { \
  228. const ctype *start = _upb_array_constptr(arr); \
  229. const ctype *ptr = start + arr->len; \
  230. uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
  231. do { \
  232. ptr--; \
  233. encode_varint(e, encode); \
  234. if (tag) encode_varint(e, tag); \
  235. } while (ptr != start); \
  236. } \
  237. break;
  238. #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
  239. switch (f->descriptortype) {
  240. case UPB_DESCRIPTOR_TYPE_DOUBLE:
  241. encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT));
  242. break;
  243. case UPB_DESCRIPTOR_TYPE_FLOAT:
  244. encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT));
  245. break;
  246. case UPB_DESCRIPTOR_TYPE_SFIXED64:
  247. case UPB_DESCRIPTOR_TYPE_FIXED64:
  248. encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT));
  249. break;
  250. case UPB_DESCRIPTOR_TYPE_FIXED32:
  251. case UPB_DESCRIPTOR_TYPE_SFIXED32:
  252. encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT));
  253. break;
  254. case UPB_DESCRIPTOR_TYPE_INT64:
  255. case UPB_DESCRIPTOR_TYPE_UINT64:
  256. VARINT_CASE(uint64_t, *ptr);
  257. case UPB_DESCRIPTOR_TYPE_UINT32:
  258. VARINT_CASE(uint32_t, *ptr);
  259. case UPB_DESCRIPTOR_TYPE_INT32:
  260. case UPB_DESCRIPTOR_TYPE_ENUM:
  261. VARINT_CASE(int32_t, (int64_t)*ptr);
  262. case UPB_DESCRIPTOR_TYPE_BOOL:
  263. VARINT_CASE(bool, *ptr);
  264. case UPB_DESCRIPTOR_TYPE_SINT32:
  265. VARINT_CASE(int32_t, encode_zz32(*ptr));
  266. case UPB_DESCRIPTOR_TYPE_SINT64:
  267. VARINT_CASE(int64_t, encode_zz64(*ptr));
  268. case UPB_DESCRIPTOR_TYPE_STRING:
  269. case UPB_DESCRIPTOR_TYPE_BYTES: {
  270. const upb_strview *start = _upb_array_constptr(arr);
  271. const upb_strview *ptr = start + arr->len;
  272. do {
  273. ptr--;
  274. encode_bytes(e, ptr->data, ptr->size);
  275. encode_varint(e, ptr->size);
  276. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  277. } while (ptr != start);
  278. return;
  279. }
  280. case UPB_DESCRIPTOR_TYPE_GROUP: {
  281. const void *const*start = _upb_array_constptr(arr);
  282. const void *const*ptr = start + arr->len;
  283. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  284. if (--e->depth == 0) encode_err(e);
  285. do {
  286. size_t size;
  287. ptr--;
  288. encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
  289. encode_message(e, *ptr, subm, &size);
  290. encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
  291. } while (ptr != start);
  292. e->depth++;
  293. return;
  294. }
  295. case UPB_DESCRIPTOR_TYPE_MESSAGE: {
  296. const void *const*start = _upb_array_constptr(arr);
  297. const void *const*ptr = start + arr->len;
  298. const upb_msglayout *subm = m->submsgs[f->submsg_index];
  299. if (--e->depth == 0) encode_err(e);
  300. do {
  301. size_t size;
  302. ptr--;
  303. encode_message(e, *ptr, subm, &size);
  304. encode_varint(e, size);
  305. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  306. } while (ptr != start);
  307. e->depth++;
  308. return;
  309. }
  310. }
  311. #undef VARINT_CASE
  312. if (packed) {
  313. encode_varint(e, e->limit - e->ptr - pre_len);
  314. encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
  315. }
  316. }
  317. static void encode_mapentry(upb_encstate *e, uint32_t number,
  318. const upb_msglayout *layout,
  319. const upb_map_entry *ent) {
  320. const upb_msglayout_field *key_field = &layout->fields[0];
  321. const upb_msglayout_field *val_field = &layout->fields[1];
  322. size_t pre_len = e->limit - e->ptr;
  323. size_t size;
  324. encode_scalar(e, &ent->v, layout, val_field, false);
  325. encode_scalar(e, &ent->k, layout, key_field, false);
  326. size = (e->limit - e->ptr) - pre_len;
  327. encode_varint(e, size);
  328. encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
  329. }
  330. static void encode_map(upb_encstate *e, const char *field_mem,
  331. const upb_msglayout *m, const upb_msglayout_field *f) {
  332. const upb_map *map = *(const upb_map**)field_mem;
  333. const upb_msglayout *layout = m->submsgs[f->submsg_index];
  334. UPB_ASSERT(layout->field_count == 2);
  335. if (map == NULL) return;
  336. if (e->options & UPB_ENCODE_DETERMINISTIC) {
  337. _upb_sortedmap sorted;
  338. _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
  339. &sorted);
  340. upb_map_entry ent;
  341. while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
  342. encode_mapentry(e, f->number, layout, &ent);
  343. }
  344. _upb_mapsorter_popmap(&e->sorter, &sorted);
  345. } else {
  346. upb_strtable_iter i;
  347. upb_strtable_begin(&i, &map->table);
  348. for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
  349. upb_strview key = upb_strtable_iter_key(&i);
  350. const upb_value val = upb_strtable_iter_value(&i);
  351. upb_map_entry ent;
  352. _upb_map_fromkey(key, &ent.k, map->key_size);
  353. _upb_map_fromvalue(val, &ent.v, map->val_size);
  354. encode_mapentry(e, f->number, layout, &ent);
  355. }
  356. }
  357. }
  358. static void encode_scalarfield(upb_encstate *e, const char *msg,
  359. const upb_msglayout *m,
  360. const upb_msglayout_field *f) {
  361. bool skip_empty = false;
  362. if (f->presence == 0) {
  363. /* Proto3 presence. */
  364. skip_empty = true;
  365. } else if (f->presence > 0) {
  366. /* Proto2 presence: hasbit. */
  367. if (!_upb_hasbit_field(msg, f)) return;
  368. } else {
  369. /* Field is in a oneof. */
  370. if (_upb_getoneofcase_field(msg, f) != f->number) return;
  371. }
  372. encode_scalar(e, msg + f->offset, m, f, skip_empty);
  373. }
  374. static void encode_message(upb_encstate *e, const char *msg,
  375. const upb_msglayout *m, size_t *size) {
  376. size_t pre_len = e->limit - e->ptr;
  377. const upb_msglayout_field *f = &m->fields[m->field_count];
  378. const upb_msglayout_field *first = &m->fields[0];
  379. if ((e->options & UPB_ENCODE_SKIPUNKNOWN) == 0) {
  380. size_t unknown_size;
  381. const char *unknown = upb_msg_getunknown(msg, &unknown_size);
  382. if (unknown) {
  383. encode_bytes(e, unknown, unknown_size);
  384. }
  385. }
  386. while (f != first) {
  387. f--;
  388. if (_upb_isrepeated(f)) {
  389. encode_array(e, msg + f->offset, m, f);
  390. } else if (f->label == _UPB_LABEL_MAP) {
  391. encode_map(e, msg + f->offset, m, f);
  392. } else {
  393. encode_scalarfield(e, msg, m, f);
  394. }
  395. }
  396. *size = (e->limit - e->ptr) - pre_len;
  397. }
  398. char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
  399. upb_arena *arena, size_t *size) {
  400. upb_encstate e;
  401. unsigned depth = (unsigned)options >> 16;
  402. e.alloc = upb_arena_alloc(arena);
  403. e.buf = NULL;
  404. e.limit = NULL;
  405. e.ptr = NULL;
  406. e.depth = depth ? depth : 64;
  407. e.options = options;
  408. _upb_mapsorter_init(&e.sorter);
  409. char *ret = NULL;
  410. if (UPB_SETJMP(e.err)) {
  411. *size = 0;
  412. ret = NULL;
  413. } else {
  414. encode_message(&e, msg, m, size);
  415. *size = e.limit - e.ptr;
  416. if (*size == 0) {
  417. static char ch;
  418. ret = &ch;
  419. } else {
  420. UPB_ASSERT(e.ptr);
  421. ret = e.ptr;
  422. }
  423. }
  424. _upb_mapsorter_destroy(&e.sorter);
  425. return ret;
  426. }