table.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889
  1. /*
  2. ** upb_table Implementation
  3. **
  4. ** Implementation is heavily inspired by Lua's ltable.c.
  5. */
  6. #include "upb/table.int.h"
  7. #include <string.h>
  8. #include "upb/port_def.inc"
  9. #define UPB_MAXARRSIZE 16 /* 64k. */
  10. /* From Chromium. */
  11. #define ARRAY_SIZE(x) \
  12. ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
  13. static const double MAX_LOAD = 0.85;
  14. /* The minimum utilization of the array part of a mixed hash/array table. This
  15. * is a speed/memory-usage tradeoff (though it's not straightforward because of
  16. * cache effects). The lower this is, the more memory we'll use. */
  17. static const double MIN_DENSITY = 0.1;
  18. bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
  19. int log2ceil(uint64_t v) {
  20. int ret = 0;
  21. bool pow2 = is_pow2(v);
  22. while (v >>= 1) ret++;
  23. ret = pow2 ? ret : ret + 1; /* Ceiling. */
  24. return UPB_MIN(UPB_MAXARRSIZE, ret);
  25. }
  26. char *upb_strdup(const char *s, upb_alloc *a) {
  27. return upb_strdup2(s, strlen(s), a);
  28. }
  29. char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
  30. size_t n;
  31. char *p;
  32. /* Prevent overflow errors. */
  33. if (len == SIZE_MAX) return NULL;
  34. /* Always null-terminate, even if binary data; but don't rely on the input to
  35. * have a null-terminating byte since it may be a raw binary buffer. */
  36. n = len + 1;
  37. p = upb_malloc(a, n);
  38. if (p) {
  39. memcpy(p, s, len);
  40. p[len] = 0;
  41. }
  42. return p;
  43. }
  44. /* A type to represent the lookup key of either a strtable or an inttable. */
  45. typedef union {
  46. uintptr_t num;
  47. struct {
  48. const char *str;
  49. size_t len;
  50. } str;
  51. } lookupkey_t;
  52. static lookupkey_t strkey2(const char *str, size_t len) {
  53. lookupkey_t k;
  54. k.str.str = str;
  55. k.str.len = len;
  56. return k;
  57. }
  58. static lookupkey_t intkey(uintptr_t key) {
  59. lookupkey_t k;
  60. k.num = key;
  61. return k;
  62. }
  63. typedef uint32_t hashfunc_t(upb_tabkey key);
  64. typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
  65. /* Base table (shared code) ***************************************************/
  66. /* For when we need to cast away const. */
  67. static upb_tabent *mutable_entries(upb_table *t) {
  68. return (upb_tabent*)t->entries;
  69. }
  70. static bool isfull(upb_table *t) {
  71. if (upb_table_size(t) == 0) {
  72. return true;
  73. } else {
  74. return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
  75. }
  76. }
  77. static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
  78. size_t bytes;
  79. t->count = 0;
  80. t->size_lg2 = size_lg2;
  81. t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
  82. bytes = upb_table_size(t) * sizeof(upb_tabent);
  83. if (bytes > 0) {
  84. t->entries = upb_malloc(a, bytes);
  85. if (!t->entries) return false;
  86. memset(mutable_entries(t), 0, bytes);
  87. } else {
  88. t->entries = NULL;
  89. }
  90. return true;
  91. }
  92. static void uninit(upb_table *t, upb_alloc *a) {
  93. upb_free(a, mutable_entries(t));
  94. }
  95. static upb_tabent *emptyent(upb_table *t) {
  96. upb_tabent *e = mutable_entries(t) + upb_table_size(t);
  97. while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
  98. }
  99. static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
  100. return (upb_tabent*)upb_getentry(t, hash);
  101. }
  102. static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
  103. uint32_t hash, eqlfunc_t *eql) {
  104. const upb_tabent *e;
  105. if (t->size_lg2 == 0) return NULL;
  106. e = upb_getentry(t, hash);
  107. if (upb_tabent_isempty(e)) return NULL;
  108. while (1) {
  109. if (eql(e->key, key)) return e;
  110. if ((e = e->next) == NULL) return NULL;
  111. }
  112. }
  113. static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
  114. uint32_t hash, eqlfunc_t *eql) {
  115. return (upb_tabent*)findentry(t, key, hash, eql);
  116. }
  117. static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
  118. uint32_t hash, eqlfunc_t *eql) {
  119. const upb_tabent *e = findentry(t, key, hash, eql);
  120. if (e) {
  121. if (v) {
  122. _upb_value_setval(v, e->val.val);
  123. }
  124. return true;
  125. } else {
  126. return false;
  127. }
  128. }
  129. /* The given key must not already exist in the table. */
  130. static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
  131. upb_value val, uint32_t hash,
  132. hashfunc_t *hashfunc, eqlfunc_t *eql) {
  133. upb_tabent *mainpos_e;
  134. upb_tabent *our_e;
  135. UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
  136. t->count++;
  137. mainpos_e = getentry_mutable(t, hash);
  138. our_e = mainpos_e;
  139. if (upb_tabent_isempty(mainpos_e)) {
  140. /* Our main position is empty; use it. */
  141. our_e->next = NULL;
  142. } else {
  143. /* Collision. */
  144. upb_tabent *new_e = emptyent(t);
  145. /* Head of collider's chain. */
  146. upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
  147. if (chain == mainpos_e) {
  148. /* Existing ent is in its main posisiton (it has the same hash as us, and
  149. * is the head of our chain). Insert to new ent and append to this chain. */
  150. new_e->next = mainpos_e->next;
  151. mainpos_e->next = new_e;
  152. our_e = new_e;
  153. } else {
  154. /* Existing ent is not in its main position (it is a node in some other
  155. * chain). This implies that no existing ent in the table has our hash.
  156. * Evict it (updating its chain) and use its ent for head of our chain. */
  157. *new_e = *mainpos_e; /* copies next. */
  158. while (chain->next != mainpos_e) {
  159. chain = (upb_tabent*)chain->next;
  160. UPB_ASSERT(chain);
  161. }
  162. chain->next = new_e;
  163. our_e = mainpos_e;
  164. our_e->next = NULL;
  165. }
  166. }
  167. our_e->key = tabkey;
  168. our_e->val.val = val.val;
  169. UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
  170. }
  171. static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
  172. upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
  173. upb_tabent *chain = getentry_mutable(t, hash);
  174. if (upb_tabent_isempty(chain)) return false;
  175. if (eql(chain->key, key)) {
  176. /* Element to remove is at the head of its chain. */
  177. t->count--;
  178. if (val) _upb_value_setval(val, chain->val.val);
  179. if (removed) *removed = chain->key;
  180. if (chain->next) {
  181. upb_tabent *move = (upb_tabent*)chain->next;
  182. *chain = *move;
  183. move->key = 0; /* Make the slot empty. */
  184. } else {
  185. chain->key = 0; /* Make the slot empty. */
  186. }
  187. return true;
  188. } else {
  189. /* Element to remove is either in a non-head position or not in the
  190. * table. */
  191. while (chain->next && !eql(chain->next->key, key)) {
  192. chain = (upb_tabent*)chain->next;
  193. }
  194. if (chain->next) {
  195. /* Found element to remove. */
  196. upb_tabent *rm = (upb_tabent*)chain->next;
  197. t->count--;
  198. if (val) _upb_value_setval(val, chain->next->val.val);
  199. if (removed) *removed = rm->key;
  200. rm->key = 0; /* Make the slot empty. */
  201. chain->next = rm->next;
  202. return true;
  203. } else {
  204. /* Element to remove is not in the table. */
  205. return false;
  206. }
  207. }
  208. }
  209. static size_t next(const upb_table *t, size_t i) {
  210. do {
  211. if (++i >= upb_table_size(t))
  212. return SIZE_MAX;
  213. } while(upb_tabent_isempty(&t->entries[i]));
  214. return i;
  215. }
  216. static size_t begin(const upb_table *t) {
  217. return next(t, -1);
  218. }
  219. /* upb_strtable ***************************************************************/
  220. /* A simple "subclass" of upb_table that only adds a hash function for strings. */
  221. static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
  222. uint32_t len = (uint32_t) k2.str.len;
  223. char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
  224. if (str == NULL) return 0;
  225. memcpy(str, &len, sizeof(uint32_t));
  226. memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
  227. str[sizeof(uint32_t) + k2.str.len] = '\0';
  228. return (uintptr_t)str;
  229. }
  230. static uint32_t strhash(upb_tabkey key) {
  231. uint32_t len;
  232. char *str = upb_tabstr(key, &len);
  233. return upb_murmur_hash2(str, len, 0);
  234. }
  235. static bool streql(upb_tabkey k1, lookupkey_t k2) {
  236. uint32_t len;
  237. char *str = upb_tabstr(k1, &len);
  238. return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
  239. }
  240. bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
  241. return init(&t->t, 2, a);
  242. }
  243. void upb_strtable_clear(upb_strtable *t) {
  244. size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
  245. t->t.count = 0;
  246. memset((char*)t->t.entries, 0, bytes);
  247. }
  248. void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
  249. size_t i;
  250. for (i = 0; i < upb_table_size(&t->t); i++)
  251. upb_free(a, (void*)t->t.entries[i].key);
  252. uninit(&t->t, a);
  253. }
  254. bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
  255. upb_strtable new_table;
  256. upb_strtable_iter i;
  257. if (!init(&new_table.t, size_lg2, a))
  258. return false;
  259. upb_strtable_begin(&i, t);
  260. for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
  261. upb_strview key = upb_strtable_iter_key(&i);
  262. upb_strtable_insert3(
  263. &new_table, key.data, key.size,
  264. upb_strtable_iter_value(&i), a);
  265. }
  266. upb_strtable_uninit2(t, a);
  267. *t = new_table;
  268. return true;
  269. }
  270. bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
  271. upb_value v, upb_alloc *a) {
  272. lookupkey_t key;
  273. upb_tabkey tabkey;
  274. uint32_t hash;
  275. if (isfull(&t->t)) {
  276. /* Need to resize. New table of double the size, add old elements to it. */
  277. if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
  278. return false;
  279. }
  280. }
  281. key = strkey2(k, len);
  282. tabkey = strcopy(key, a);
  283. if (tabkey == 0) return false;
  284. hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
  285. insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
  286. return true;
  287. }
  288. bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
  289. upb_value *v) {
  290. uint32_t hash = upb_murmur_hash2(key, len, 0);
  291. return lookup(&t->t, strkey2(key, len), v, hash, &streql);
  292. }
  293. bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
  294. upb_value *val, upb_alloc *alloc) {
  295. uint32_t hash = upb_murmur_hash2(key, len, 0);
  296. upb_tabkey tabkey;
  297. if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
  298. if (alloc) {
  299. /* Arena-based allocs don't need to free and won't pass this. */
  300. upb_free(alloc, (void*)tabkey);
  301. }
  302. return true;
  303. } else {
  304. return false;
  305. }
  306. }
  307. /* Iteration */
  308. void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
  309. i->t = t;
  310. i->index = begin(&t->t);
  311. }
  312. void upb_strtable_next(upb_strtable_iter *i) {
  313. i->index = next(&i->t->t, i->index);
  314. }
  315. bool upb_strtable_done(const upb_strtable_iter *i) {
  316. if (!i->t) return true;
  317. return i->index >= upb_table_size(&i->t->t) ||
  318. upb_tabent_isempty(str_tabent(i));
  319. }
  320. upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) {
  321. upb_strview key;
  322. uint32_t len;
  323. UPB_ASSERT(!upb_strtable_done(i));
  324. key.data = upb_tabstr(str_tabent(i)->key, &len);
  325. key.size = len;
  326. return key;
  327. }
  328. upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
  329. UPB_ASSERT(!upb_strtable_done(i));
  330. return _upb_value_val(str_tabent(i)->val.val);
  331. }
  332. void upb_strtable_iter_setdone(upb_strtable_iter *i) {
  333. i->t = NULL;
  334. i->index = SIZE_MAX;
  335. }
  336. bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
  337. const upb_strtable_iter *i2) {
  338. if (upb_strtable_done(i1) && upb_strtable_done(i2))
  339. return true;
  340. return i1->t == i2->t && i1->index == i2->index;
  341. }
  342. /* upb_inttable ***************************************************************/
  343. /* For inttables we use a hybrid structure where small keys are kept in an
  344. * array and large keys are put in the hash table. */
  345. static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
  346. static bool inteql(upb_tabkey k1, lookupkey_t k2) {
  347. return k1 == k2.num;
  348. }
  349. static upb_tabval *mutable_array(upb_inttable *t) {
  350. return (upb_tabval*)t->array;
  351. }
  352. static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
  353. if (key < t->array_size) {
  354. return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
  355. } else {
  356. upb_tabent *e =
  357. findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
  358. return e ? &e->val : NULL;
  359. }
  360. }
  361. static const upb_tabval *inttable_val_const(const upb_inttable *t,
  362. uintptr_t key) {
  363. return inttable_val((upb_inttable*)t, key);
  364. }
  365. size_t upb_inttable_count(const upb_inttable *t) {
  366. return t->t.count + t->array_count;
  367. }
  368. static void check(upb_inttable *t) {
  369. UPB_UNUSED(t);
  370. #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
  371. {
  372. /* This check is very expensive (makes inserts/deletes O(N)). */
  373. size_t count = 0;
  374. upb_inttable_iter i;
  375. upb_inttable_begin(&i, t);
  376. for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
  377. UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
  378. }
  379. UPB_ASSERT(count == upb_inttable_count(t));
  380. }
  381. #endif
  382. }
  383. bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2,
  384. upb_alloc *a) {
  385. size_t array_bytes;
  386. if (!init(&t->t, hsize_lg2, a)) return false;
  387. /* Always make the array part at least 1 long, so that we know key 0
  388. * won't be in the hash part, which simplifies things. */
  389. t->array_size = UPB_MAX(1, asize);
  390. t->array_count = 0;
  391. array_bytes = t->array_size * sizeof(upb_value);
  392. t->array = upb_malloc(a, array_bytes);
  393. if (!t->array) {
  394. uninit(&t->t, a);
  395. return false;
  396. }
  397. memset(mutable_array(t), 0xff, array_bytes);
  398. check(t);
  399. return true;
  400. }
  401. bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
  402. return upb_inttable_sizedinit(t, 0, 4, a);
  403. }
  404. void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
  405. uninit(&t->t, a);
  406. upb_free(a, mutable_array(t));
  407. }
  408. bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
  409. upb_alloc *a) {
  410. upb_tabval tabval;
  411. tabval.val = val.val;
  412. UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
  413. if (key < t->array_size) {
  414. UPB_ASSERT(!upb_arrhas(t->array[key]));
  415. t->array_count++;
  416. mutable_array(t)[key].val = val.val;
  417. } else {
  418. if (isfull(&t->t)) {
  419. /* Need to resize the hash part, but we re-use the array part. */
  420. size_t i;
  421. upb_table new_table;
  422. if (!init(&new_table, t->t.size_lg2 + 1, a)) {
  423. return false;
  424. }
  425. for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
  426. const upb_tabent *e = &t->t.entries[i];
  427. uint32_t hash;
  428. upb_value v;
  429. _upb_value_setval(&v, e->val.val);
  430. hash = upb_inthash(e->key);
  431. insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
  432. }
  433. UPB_ASSERT(t->t.count == new_table.count);
  434. uninit(&t->t, a);
  435. t->t = new_table;
  436. }
  437. insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
  438. }
  439. check(t);
  440. return true;
  441. }
  442. bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
  443. const upb_tabval *table_v = inttable_val_const(t, key);
  444. if (!table_v) return false;
  445. if (v) _upb_value_setval(v, table_v->val);
  446. return true;
  447. }
  448. bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
  449. upb_tabval *table_v = inttable_val(t, key);
  450. if (!table_v) return false;
  451. table_v->val = val.val;
  452. return true;
  453. }
  454. bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
  455. bool success;
  456. if (key < t->array_size) {
  457. if (upb_arrhas(t->array[key])) {
  458. upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
  459. t->array_count--;
  460. if (val) {
  461. _upb_value_setval(val, t->array[key].val);
  462. }
  463. mutable_array(t)[key] = empty;
  464. success = true;
  465. } else {
  466. success = false;
  467. }
  468. } else {
  469. success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
  470. }
  471. check(t);
  472. return success;
  473. }
  474. bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
  475. return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
  476. }
  477. upb_value upb_inttable_pop(upb_inttable *t) {
  478. upb_value val;
  479. bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
  480. UPB_ASSERT(ok);
  481. return val;
  482. }
  483. bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
  484. upb_alloc *a) {
  485. return upb_inttable_insert2(t, (uintptr_t)key, val, a);
  486. }
  487. bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
  488. upb_value *v) {
  489. return upb_inttable_lookup(t, (uintptr_t)key, v);
  490. }
  491. bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
  492. return upb_inttable_remove(t, (uintptr_t)key, val);
  493. }
  494. void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
  495. /* A power-of-two histogram of the table keys. */
  496. size_t counts[UPB_MAXARRSIZE + 1] = {0};
  497. /* The max key in each bucket. */
  498. uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
  499. upb_inttable_iter i;
  500. size_t arr_count;
  501. int size_lg2;
  502. upb_inttable new_t;
  503. upb_inttable_begin(&i, t);
  504. for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
  505. uintptr_t key = upb_inttable_iter_key(&i);
  506. int bucket = log2ceil(key);
  507. max[bucket] = UPB_MAX(max[bucket], key);
  508. counts[bucket]++;
  509. }
  510. /* Find the largest power of two that satisfies the MIN_DENSITY
  511. * definition (while actually having some keys). */
  512. arr_count = upb_inttable_count(t);
  513. for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
  514. if (counts[size_lg2] == 0) {
  515. /* We can halve again without losing any entries. */
  516. continue;
  517. } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
  518. break;
  519. }
  520. arr_count -= counts[size_lg2];
  521. }
  522. UPB_ASSERT(arr_count <= upb_inttable_count(t));
  523. {
  524. /* Insert all elements into new, perfectly-sized table. */
  525. size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
  526. size_t hash_count = upb_inttable_count(t) - arr_count;
  527. size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
  528. int hashsize_lg2 = log2ceil(hash_size);
  529. upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
  530. upb_inttable_begin(&i, t);
  531. for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
  532. uintptr_t k = upb_inttable_iter_key(&i);
  533. upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
  534. }
  535. UPB_ASSERT(new_t.array_size == arr_size);
  536. UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
  537. }
  538. upb_inttable_uninit2(t, a);
  539. *t = new_t;
  540. }
  541. /* Iteration. */
  542. static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
  543. UPB_ASSERT(!i->array_part);
  544. return &i->t->t.entries[i->index];
  545. }
  546. static upb_tabval int_arrent(const upb_inttable_iter *i) {
  547. UPB_ASSERT(i->array_part);
  548. return i->t->array[i->index];
  549. }
  550. void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
  551. i->t = t;
  552. i->index = -1;
  553. i->array_part = true;
  554. upb_inttable_next(i);
  555. }
  556. void upb_inttable_next(upb_inttable_iter *iter) {
  557. const upb_inttable *t = iter->t;
  558. if (iter->array_part) {
  559. while (++iter->index < t->array_size) {
  560. if (upb_arrhas(int_arrent(iter))) {
  561. return;
  562. }
  563. }
  564. iter->array_part = false;
  565. iter->index = begin(&t->t);
  566. } else {
  567. iter->index = next(&t->t, iter->index);
  568. }
  569. }
  570. bool upb_inttable_done(const upb_inttable_iter *i) {
  571. if (!i->t) return true;
  572. if (i->array_part) {
  573. return i->index >= i->t->array_size ||
  574. !upb_arrhas(int_arrent(i));
  575. } else {
  576. return i->index >= upb_table_size(&i->t->t) ||
  577. upb_tabent_isempty(int_tabent(i));
  578. }
  579. }
  580. uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
  581. UPB_ASSERT(!upb_inttable_done(i));
  582. return i->array_part ? i->index : int_tabent(i)->key;
  583. }
  584. upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
  585. UPB_ASSERT(!upb_inttable_done(i));
  586. return _upb_value_val(
  587. i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val);
  588. }
  589. void upb_inttable_iter_setdone(upb_inttable_iter *i) {
  590. i->t = NULL;
  591. i->index = SIZE_MAX;
  592. i->array_part = false;
  593. }
  594. bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
  595. const upb_inttable_iter *i2) {
  596. if (upb_inttable_done(i1) && upb_inttable_done(i2))
  597. return true;
  598. return i1->t == i2->t && i1->index == i2->index &&
  599. i1->array_part == i2->array_part;
  600. }
  601. #if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
  602. /* -----------------------------------------------------------------------------
  603. * MurmurHash2, by Austin Appleby (released as public domain).
  604. * Reformatted and C99-ified by Joshua Haberman.
  605. * Note - This code makes a few assumptions about how your machine behaves -
  606. * 1. We can read a 4-byte value from any address without crashing
  607. * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
  608. * And it has a few limitations -
  609. * 1. It will not work incrementally.
  610. * 2. It will not produce the same results on little-endian and big-endian
  611. * machines. */
  612. uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
  613. /* 'm' and 'r' are mixing constants generated offline.
  614. * They're not really 'magic', they just happen to work well. */
  615. const uint32_t m = 0x5bd1e995;
  616. const int32_t r = 24;
  617. /* Initialize the hash to a 'random' value */
  618. uint32_t h = seed ^ len;
  619. /* Mix 4 bytes at a time into the hash */
  620. const uint8_t * data = (const uint8_t *)key;
  621. while(len >= 4) {
  622. uint32_t k;
  623. memcpy(&k, data, sizeof(k));
  624. k *= m;
  625. k ^= k >> r;
  626. k *= m;
  627. h *= m;
  628. h ^= k;
  629. data += 4;
  630. len -= 4;
  631. }
  632. /* Handle the last few bytes of the input array */
  633. switch(len) {
  634. case 3: h ^= data[2] << 16;
  635. case 2: h ^= data[1] << 8;
  636. case 1: h ^= data[0]; h *= m;
  637. };
  638. /* Do a few final mixes of the hash to ensure the last few
  639. * bytes are well-incorporated. */
  640. h ^= h >> 13;
  641. h *= m;
  642. h ^= h >> 15;
  643. return h;
  644. }
  645. #else /* !UPB_UNALIGNED_READS_OK */
  646. /* -----------------------------------------------------------------------------
  647. * MurmurHashAligned2, by Austin Appleby
  648. * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
  649. * on certain platforms.
  650. * Performance will be lower than MurmurHash2 */
  651. #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
  652. uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
  653. const uint32_t m = 0x5bd1e995;
  654. const int32_t r = 24;
  655. const uint8_t * data = (const uint8_t *)key;
  656. uint32_t h = (uint32_t)(seed ^ len);
  657. uint8_t align = (uintptr_t)data & 3;
  658. if(align && (len >= 4)) {
  659. /* Pre-load the temp registers */
  660. uint32_t t = 0, d = 0;
  661. int32_t sl;
  662. int32_t sr;
  663. switch(align) {
  664. case 1: t |= data[2] << 16;
  665. case 2: t |= data[1] << 8;
  666. case 3: t |= data[0];
  667. }
  668. t <<= (8 * align);
  669. data += 4-align;
  670. len -= 4-align;
  671. sl = 8 * (4-align);
  672. sr = 8 * align;
  673. /* Mix */
  674. while(len >= 4) {
  675. uint32_t k;
  676. d = *(uint32_t *)data;
  677. t = (t >> sr) | (d << sl);
  678. k = t;
  679. MIX(h,k,m);
  680. t = d;
  681. data += 4;
  682. len -= 4;
  683. }
  684. /* Handle leftover data in temp registers */
  685. d = 0;
  686. if(len >= align) {
  687. uint32_t k;
  688. switch(align) {
  689. case 3: d |= data[2] << 16;
  690. case 2: d |= data[1] << 8;
  691. case 1: d |= data[0];
  692. }
  693. k = (t >> sr) | (d << sl);
  694. MIX(h,k,m);
  695. data += align;
  696. len -= align;
  697. /* ----------
  698. * Handle tail bytes */
  699. switch(len) {
  700. case 3: h ^= data[2] << 16;
  701. case 2: h ^= data[1] << 8;
  702. case 1: h ^= data[0]; h *= m;
  703. };
  704. } else {
  705. switch(len) {
  706. case 3: d |= data[2] << 16;
  707. case 2: d |= data[1] << 8;
  708. case 1: d |= data[0];
  709. case 0: h ^= (t >> sr) | (d << sl); h *= m;
  710. }
  711. }
  712. h ^= h >> 13;
  713. h *= m;
  714. h ^= h >> 15;
  715. return h;
  716. } else {
  717. while(len >= 4) {
  718. uint32_t k = *(uint32_t *)data;
  719. MIX(h,k,m);
  720. data += 4;
  721. len -= 4;
  722. }
  723. /* ----------
  724. * Handle tail bytes */
  725. switch(len) {
  726. case 3: h ^= data[2] << 16;
  727. case 2: h ^= data[1] << 8;
  728. case 1: h ^= data[0]; h *= m;
  729. };
  730. h ^= h >> 13;
  731. h *= m;
  732. h ^= h >> 15;
  733. return h;
  734. }
  735. }
  736. #undef MIX
  737. #endif /* UPB_UNALIGNED_READS_OK */