| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367 | 
							- /*
 
- ------------------------------------------------------------------------------
 
- perfect.c: code to generate code for a hash for perfect hashing.
 
- (c) Bob Jenkins, September 1996, December 1999
 
- You may use this code in any way you wish, and it is free.  No warranty.
 
- I hereby place this in the public domain.
 
- Source is http://burtleburtle.net/bob/c/perfect.c
 
- This generates a minimal perfect hash function.  That means, given a
 
- set of n keys, this determines a hash function that maps each of
 
- those keys into a value in 0..n-1 with no collisions.
 
- The perfect hash function first uses a normal hash function on the key
 
- to determine (a,b) such that the pair (a,b) is distinct for all
 
- keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
 
- tab[] is an array of 1-byte values and scramble[] is a 256-term array of 
 
- 2-byte or 4-byte values.  If there are n keys, the length of tab[] is a 
 
- power of two between n/3 and n.
 
- I found the idea of computing distinct (a,b) values in "Practical minimal 
 
- perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, 
 
- Communications of the ACM, January 1992.  They found the idea in Chichelli 
 
- (CACM Jan 1980).  Beyond that, our methods differ.
 
- The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
 
- 0..*blen*-1.  A fast hash function determines both a and b
 
- simultaneously.  Any decent hash function is likely to produce
 
- hashes so that (a,b) is distinct for all pairs.  I try the hash
 
- using different values of *salt* until all pairs are distinct.
 
- The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
 
- predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
 
- array that we fill in in such a way as to make the hash perfect.
 
- First we fill in all values of *tab* that are used by more than one
 
- key.  We try all possible values for each position until one works.
 
- This leaves m unmapped keys and m values that something could hash to.
 
- If you treat unmapped keys as lefthand nodes and unused hash values
 
- as righthand nodes, and draw a line connecting each key to each hash
 
- value it could map to, you get a bipartite graph.  We attempt to
 
- find a perfect matching in this graph.  If we succeed, we have
 
- determined a perfect hash for the whole set of keys.
 
- *scramble* is used because (a^tab[i]) clusters keys around *a*.
 
- ------------------------------------------------------------------------------
 
- */
 
- #ifndef STANDARD
 
- #include "standard.h"
 
- #endif
 
- #ifndef LOOKUPA
 
- #include "lookupa.h"
 
- #endif
 
- #ifndef RECYCLE
 
- #include "recycle.h"
 
- #endif
 
- #ifndef PERFECT
 
- #include "perfect.h"
 
- #endif
 
- /*
 
- ------------------------------------------------------------------------------
 
- Find the mapping that will produce a perfect hash
 
- ------------------------------------------------------------------------------
 
- */
 
- /* return the ceiling of the log (base 2) of val */
 
- ub4  mylog2(val)
 
- ub4  val;
 
- {
 
-   ub4 i;
 
-   for (i=0; ((ub4)1<<i) < val; ++i)
 
-     ;
 
-   return i;
 
- }
 
- /* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
 
- /* permute(0)=0.  This is intended and useful. */
 
- static ub4  permute(x, nbits)
 
- ub4 x;                                       /* input, a value in some range */
 
- ub4 nbits;                                 /* input, number of bits in range */
 
- {
 
-   int i;
 
-   int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
 
-   int const2 = 1+nbits/2;
 
-   int const3 = 1+nbits/3;
 
-   int const4 = 1+nbits/4;
 
-   int const5 = 1+nbits/5;
 
-   for (i=0; i<20; ++i)
 
-   {
 
-     x = (x+(x<<const2)) & mask; 
 
-     x = (x^(x>>const3));
 
-     x = (x+(x<<const4)) & mask;
 
-     x = (x^(x>>const5));
 
-   }
 
-   return x;
 
- }
 
- /* initialize scramble[] with distinct random values in 0..smax-1 */
 
- static void scrambleinit(scramble, smax)
 
- ub4      *scramble;                            /* hash is a^scramble[tab[b]] */
 
- ub4       smax;                    /* scramble values should be in 0..smax-1 */
 
- {
 
-   ub4 i;
 
-   /* fill scramble[] with distinct random integers in 0..smax-1 */
 
-   for (i=0; i<SCRAMBLE_LEN; ++i)
 
-   {
 
-     scramble[i] = permute(i, mylog2(smax));
 
-   }
 
- }
 
- /* 
 
-  * Check if key1 and key2 are the same. 
 
-  * We already checked (a,b) are the same.
 
-  */
 
- static void checkdup(key1, key2, form)
 
- key      *key1;
 
- key      *key2;
 
- hashform *form;
 
- {
 
-   switch(form->hashtype)
 
-   {
 
-   case STRING_HT:
 
-     if ((key1->len_k == key2->len_k) &&
 
- 	!memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k))
 
-     {
 
-       fprintf(stderr, "perfect.c: Duplicates keys!  %.*s\n",
 
- 	      key1->len_k, key1->name_k);
 
-       exit(SUCCESS);
 
-     }
 
-     break;
 
-   case INT_HT:
 
-     if (key1->hash_k == key2->hash_k)
 
-     {
 
-       fprintf(stderr, "perfect.c: Duplicate keys!  %.8lx\n", key1->hash_k);
 
-       exit(SUCCESS);
 
-     }
 
-     break;
 
-   case AB_HT:
 
-     fprintf(stderr, "perfect.c: Duplicate keys!  %.8lx %.8lx\n",
 
- 	    key1->a_k, key1->b_k);
 
-     exit(SUCCESS);
 
-     break;
 
-   default:
 
-     fprintf(stderr, "perfect.c: Illegal hash type %ld\n", (ub4)form->hashtype);
 
-     exit(SUCCESS);
 
-     break;
 
-   }
 
- }
 
- /* 
 
-  * put keys in tabb according to key->b_k
 
-  * check if the initial hash might work 
 
-  */
 
- static int inittab(tabb, blen, keys, form, complete)
 
- bstuff   *tabb;                     /* output, list of keys with b for (a,b) */
 
- ub4       blen;                                            /* length of tabb */
 
- key      *keys;                               /* list of keys already hashed */
 
- hashform *form;                                           /* user directives */
 
- int       complete;        /* TRUE means to complete init despite collisions */
 
- {
 
-   int  nocollision = TRUE;
 
-   key *mykey;
 
-   memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
 
-   /* Two keys with the same (a,b) guarantees a collision */
 
-   for (mykey=keys; mykey; mykey=mykey->next_k)
 
-   {
 
-     key *otherkey;
 
-     for (otherkey=tabb[mykey->b_k].list_b; 
 
- 	 otherkey; 
 
- 	 otherkey=otherkey->nextb_k)
 
-     {
 
-       if (mykey->a_k == otherkey->a_k)
 
-       {
 
-         nocollision = FALSE;
 
- 	checkdup(mykey, otherkey, form);
 
- 	if (!complete)
 
- 	  return FALSE;
 
-       }
 
-     }
 
-     ++tabb[mykey->b_k].listlen_b;
 
-     mykey->nextb_k = tabb[mykey->b_k].list_b;
 
-     tabb[mykey->b_k].list_b = mykey;
 
-   }
 
-   /* no two keys have the same (a,b) pair */
 
-   return nocollision;
 
- }
 
- /* Do the initial hash for normal mode (use lookup and checksum) */
 
- static void initnorm(keys, alen, blen, smax, salt, final)
 
- key      *keys;                                          /* list of all keys */
 
- ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
 
- ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
 
- ub4       smax;                   /* maximum range of computable hash values */
 
- ub4       salt;                     /* used to initialize the hash function */
 
- gencode  *final;                          /* output, code for the final hash */
 
- {
 
-   key *mykey;
 
-   if (mylog2(alen)+mylog2(blen) > UB4BITS)
 
-   {
 
-     ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
 
-     for (mykey=keys; mykey; mykey=mykey->next_k)
 
-     {
 
-       ub4 i, state[CHECKSTATE];
 
-       for (i=0; i<CHECKSTATE; ++i) state[i] = initlev;
 
-       checksum( mykey->name_k, mykey->len_k, state);
 
-       mykey->a_k = state[0]&(alen-1);
 
-       mykey->b_k = state[1]&(blen-1);
 
-     }
 
-     final->used = 4;
 
-     sprintf(final->line[0], 
 
- 	    "  ub4 i,state[CHECKSTATE],rsl;\n");
 
-     sprintf(final->line[1], 
 
- 	    "  for (i=0; i<CHECKSTATE; ++i) state[i]=0x%lx;\n",initlev);
 
-     sprintf(final->line[2],
 
- 	    "  checksum(key, len, state);\n");
 
-     sprintf(final->line[3], 
 
- 	    "  rsl = ((state[0]&0x%x)^scramble[tab[state[1]&0x%x]]);\n",
 
- 	    alen-1, blen-1);
 
-   }
 
-   else
 
-   {
 
-     ub4 loga = mylog2(alen);                            /* log based 2 of blen */
 
-     ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
 
-     for (mykey=keys; mykey; mykey=mykey->next_k)
 
-     {
 
-       ub4 hash = lookup(mykey->name_k, mykey->len_k, initlev);
 
-       mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0;
 
-       mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
 
-     }
 
-     final->used = 2;
 
-     sprintf(final->line[0], 
 
- 	    "  ub4 rsl, val = lookup(key, len, 0x%lx);\n", initlev);
 
-     if (smax <= 1)
 
-     {
 
-       sprintf(final->line[1], "  rsl = 0;\n");
 
-     }
 
-     else if (mylog2(alen) == 0)
 
-     {
 
-       sprintf(final->line[1], "  rsl = tab[val&0x%x];\n", blen-1);
 
-     }
 
-     else if (blen < USE_SCRAMBLE)
 
-     {
 
-       sprintf(final->line[1], "  rsl = ((val>>%ld)^tab[val&0x%x]);\n",
 
- 	      UB4BITS-mylog2(alen), blen-1);
 
-     }
 
-     else
 
-     {
 
-       sprintf(final->line[1], "  rsl = ((val>>%ld)^scramble[tab[val&0x%x]]);\n",
 
- 	      UB4BITS-mylog2(alen), blen-1);
 
-     }
 
-   }
 
- }
 
- /* Do initial hash for inline mode */
 
- static void initinl(keys, alen, blen, smax, salt, final)
 
- key      *keys;                                          /* list of all keys */
 
- ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
 
- ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
 
- ub4       smax;                           /* range of computable hash values */
 
- ub4       salt;                     /* used to initialize the hash function */
 
- gencode  *final;                            /* generated code for final hash */
 
- {
 
-   key *mykey;
 
-   ub4  amask = alen-1;
 
-   ub4  blog  = mylog2(blen);
 
-   ub4  initval = salt*0x9e3779b9;    /* the golden ratio; an arbitrary value */
 
-   /* It's more important to have b uniform than a, so b is the low bits */
 
-   for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
 
-   {
 
-     ub4   hash = initval;
 
-     ub4   i;
 
-     for (i=0; i<mykey->len_k; ++i)
 
-     {
 
-       hash = (mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6));
 
-     }
 
-     mykey->hash_k = hash;
 
-     mykey->a_k = (alen > 1) ? (hash & amask) : 0;
 
-     mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0;
 
-   }
 
-   final->used = 1;
 
-   if (smax <= 1)
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = 0;\n");
 
-   }
 
-   else if (blen < USE_SCRAMBLE)
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = ((val & 0x%lx) ^ tab[val >> %ld]);\n",
 
- 	    amask, UB4BITS-blog);
 
-   }
 
-   else
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = ((val & 0x%lx) ^ scramble[tab[val >> %ld]]);\n",
 
- 	    amask, UB4BITS-blog);
 
-   }
 
- }
 
- /* 
 
-  * Run a hash function on the key to get a and b 
 
-  * Returns:
 
-  *   0: didn't find distinct (a,b) for all keys
 
-  *   1: found distinct (a,b) for all keys, put keys in tabb[]
 
-  *   2: found a perfect hash, no need to do any more work
 
-  */
 
- static ub4 initkey(keys, nkeys, tabb, alen, blen, smax, salt, form, final)
 
- key      *keys;                                          /* list of all keys */
 
- ub4       nkeys;                                     /* total number of keys */
 
- bstuff   *tabb;                                        /* stuff indexed by b */
 
- ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
 
- ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
 
- ub4       smax;                           /* range of computable hash values */
 
- ub4       salt;                     /* used to initialize the hash function */
 
- hashform *form;                                           /* user directives */
 
- gencode  *final;                                      /* code for final hash */
 
- {
 
-   ub4 finished;
 
-   /* Do the initial hash of the keys */
 
-   switch(form->mode)
 
-   {
 
-   case NORMAL_HM:
 
-     initnorm(keys, alen, blen, smax, salt, final);
 
-     break;
 
-   case INLINE_HM:
 
-     initinl(keys, alen, blen, smax, salt, final);
 
-     break;
 
-   case HEX_HM:
 
-   case DECIMAL_HM:
 
-     finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form); 
 
-     if (finished) return 2;
 
-     break;
 
-   default:
 
-     fprintf(stderr, "fatal error: illegal mode\n"); 
 
-     exit(1);
 
-   }
 
-   if (nkeys <= 1)
 
-   {
 
-     final->used = 1;
 
-     sprintf(final->line[0], "  ub4 rsl = 0;\n");
 
-     return 2;
 
-   }
 
-   return inittab(tabb, blen, keys, form, FALSE);
 
- }
 
- /* Print an error message and exit if there are duplicates */
 
- static void duplicates(tabb, blen, keys, form)
 
- bstuff   *tabb;                    /* array of lists of keys with the same b */
 
- ub4       blen;                              /* length of tabb, a power of 2 */
 
- key      *keys;
 
- hashform *form;                                           /* user directives */
 
- {
 
-   ub4  i;
 
-   key *key1;
 
-   key *key2;
 
-   (void)inittab(tabb, blen, keys, form, TRUE);
 
-   /* for each b, do nested loops through key list looking for duplicates */
 
-   for (i=0; i<blen; ++i)
 
-     for (key1=tabb[i].list_b; key1; key1=key1->nextb_k)
 
-       for (key2=key1->nextb_k; key2; key2=key2->nextb_k)
 
- 	checkdup(key1, key2, form);
 
- }
 
- /* Try to apply an augmenting list */
 
- static int apply(tabb, tabh, tabq, blen, scramble, tail, rollback)
 
- bstuff *tabb;
 
- hstuff *tabh;
 
- qstuff *tabq;
 
- ub4     blen;
 
- ub4    *scramble;
 
- ub4     tail;
 
- int     rollback;          /* FALSE applies augmenting path, TRUE rolls back */
 
- {
 
-   ub4     hash;
 
-   key    *mykey;
 
-   bstuff *pb;
 
-   ub4     child;
 
-   ub4     parent;
 
-   ub4     stabb;                                         /* scramble[tab[b]] */
 
-   /* walk from child to parent */
 
-   for (child=tail-1; child; child=parent)
 
-   {
 
-     parent = tabq[child].parent_q;                    /* find child's parent */
 
-     pb     = tabq[parent].b_q;             /* find parent's list of siblings */
 
-     /* erase old hash values */
 
-     stabb = scramble[pb->val_b];
 
-     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
 
-     {
 
-       hash = mykey->a_k^stabb;
 
-       if (mykey == tabh[hash].key_h)
 
-       {                            /* erase hash for all of child's siblings */
 
- 	tabh[hash].key_h = (key *)0;
 
-       }
 
-     }
 
-     /* change pb->val_b, which will change the hashes of all parent siblings */
 
-     pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
 
-     /* set new hash values */
 
-     stabb = scramble[pb->val_b];
 
-     for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
 
-     {
 
-       hash = mykey->a_k^stabb;
 
-       if (rollback)
 
-       {
 
- 	if (parent == 0) continue;                  /* root never had a hash */
 
-       }
 
-       else if (tabh[hash].key_h)
 
-       {
 
- 	/* very rare: roll back any changes */
 
- 	(void *)apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
 
- 	return FALSE;                                  /* failure, collision */
 
-       }
 
-       tabh[hash].key_h = mykey;
 
-     }
 
-   }
 
-   return TRUE;
 
- }
 
- /*
 
- -------------------------------------------------------------------------------
 
- augment(): Add item to the mapping.
 
- Construct a spanning tree of *b*s with *item* as root, where each
 
- parent can have all its hashes changed (by some new val_b) with 
 
- at most one collision, and each child is the b of that collision.
 
- I got this from Tarjan's "Data Structures and Network Algorithms".  The
 
- path from *item* to a *b* that can be remapped with no collision is 
 
- an "augmenting path".  Change values of tab[b] along the path so that 
 
- the unmapped key gets mapped and the unused hash value gets used.
 
- Assuming 1 key per b, if m out of n hash values are still unused, 
 
- you should expect the transitive closure to cover n/m nodes before 
 
- an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
 
- this approach to take about nlogn time to map all single-key b's.
 
- -------------------------------------------------------------------------------
 
- */
 
- static int augment(tabb, tabh, tabq, blen, scramble, smax, item, nkeys, 
 
- 		   highwater, form)
 
- bstuff   *tabb;                                        /* stuff indexed by b */
 
- hstuff   *tabh;  /* which key is associated with which hash, indexed by hash */
 
- qstuff   *tabq;            /* queue of *b* values, this is the spanning tree */
 
- ub4       blen;                                            /* length of tabb */
 
- ub4      *scramble;                      /* final hash is a^scramble[tab[b]] */
 
- ub4       smax;                                 /* highest value in scramble */
 
- bstuff   *item;                           /* &tabb[b] for the b to be mapped */
 
- ub4       nkeys;                         /* final hash must be in 0..nkeys-1 */
 
- ub4       highwater;        /* a value higher than any now in tabb[].water_b */
 
- hashform *form;               /* TRUE if we should do a minimal perfect hash */
 
- {
 
-   ub4  q;                      /* current position walking through the queue */
 
-   ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
 
-   ub4  limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1);
 
-   ub4  highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax);
 
-   int  trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP);
 
-   /* initialize the root of the spanning tree */
 
-   tabq[0].b_q = item;
 
-   tail = 1;
 
-   /* construct the spanning tree by walking the queue, add children to tail */
 
-   for (q=0; q<tail; ++q)
 
-   {
 
-     bstuff *myb = tabq[q].b_q;                        /* the b for this node */
 
-     ub4     i;                              /* possible value for myb->val_b */
 
-     if (!trans && (q == 1)) 
 
-       break;                                  /* don't do transitive closure */
 
-     for (i=0; i<limit; ++i)
 
-     {
 
-       bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
 
-       key    *mykey;                       /* for walking through myb's keys */
 
-       for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
 
-       {
 
- 	key    *childkey;
 
- 	ub4 hash = mykey->a_k^scramble[i];
 
- 	if (hash >= highhash) break;                        /* out of bounds */
 
- 	childkey = tabh[hash].key_h;
 
- 	if (childkey)
 
- 	{
 
- 	  bstuff *hitb = &tabb[childkey->b_k];
 
- 	  if (childb)
 
- 	  {
 
- 	    if (childb != hitb) break;            /* hit at most one child b */
 
- 	  }
 
- 	  else
 
- 	  {
 
- 	    childb = hitb;                        /* remember this as childb */
 
- 	    if (childb->water_b == highwater) break;     /* already explored */
 
- 	  }
 
- 	}
 
-       }
 
-       if (mykey) continue;             /* myb with i has multiple collisions */
 
-       /* add childb to the queue of reachable things */
 
-       if (childb) childb->water_b = highwater;
 
-       tabq[tail].b_q      = childb;
 
-       tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
 
-       tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
 
-       tabq[tail].parent_q = q;
 
-       ++tail;
 
-       if (!childb)
 
-       {                                  /* found an *i* with no collisions? */
 
- 	/* try to apply the augmenting path */
 
- 	if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
 
- 	  return TRUE;        /* success, item was added to the perfect hash */
 
- 	--tail;                    /* don't know how to handle such a child! */
 
-       }
 
-     }
 
-   }
 
-   return FALSE;
 
- }
 
- /* find a mapping that makes this a perfect hash */
 
- static int perfect(tabb, tabh, tabq, blen, smax, scramble, nkeys, form)
 
- bstuff   *tabb;
 
- hstuff   *tabh;
 
- qstuff   *tabq;
 
- ub4       blen;
 
- ub4       smax;
 
- ub4      *scramble;
 
- ub4       nkeys;
 
- hashform *form;
 
- {
 
-   ub4 maxkeys;                           /* maximum number of keys for any b */
 
-   ub4 i, j;
 
-   /* clear any state from previous attempts */
 
-   memset((void *)tabh, 0, 
 
- 	 (size_t)(sizeof(hstuff)*
 
- 		  ((form->perfect == MINIMAL_HP) ? nkeys : smax)));
 
-   memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1)));
 
-   for (maxkeys=0,i=0; i<blen; ++i) 
 
-     if (tabb[i].listlen_b > maxkeys) 
 
-       maxkeys = tabb[i].listlen_b;
 
-   /* In descending order by number of keys, map all *b*s */
 
-   for (j=maxkeys; j>0; --j)
 
-     for (i=0; i<blen; ++i)
 
-       if (tabb[i].listlen_b == j)
 
- 	if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, 
 
- 		     i+1, form))
 
- 	{
 
- 	  printf("fail to map group of size %ld for tab size %ld\n", j, blen);
 
- 	  return FALSE;
 
- 	}
 
-   /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
 
-   return TRUE;
 
- }
 
- /*
 
-  * Simple case: user gave (a,b).  No more mixing, no guessing alen or blen. 
 
-  * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK.
 
-  */
 
- static void hash_ab(tabb, alen, blen, salt, final, 
 
- 	     scramble, smax, keys, nkeys, form)
 
- bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
 
- ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
 
- ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
 
- ub4      *salt;                         /* output, initializes initial hash */
 
- gencode  *final;                                      /* code for final hash */
 
- ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
 
- ub4      *smax;                           /* input, scramble[i] in 0..smax-1 */
 
- key      *keys;                                       /* input, keys to hash */
 
- ub4       nkeys;                       /* input, number of keys being hashed */
 
- hashform *form;                                           /* user directives */
 
- {
 
-   hstuff *tabh;
 
-   qstuff *tabq;
 
-   key    *mykey;
 
-   ub4     i;
 
-   int     used_tab;
 
-   /* initially make smax the first power of two bigger than nkeys */
 
-   *smax = ((ub4)1<<mylog2(nkeys));
 
-   scrambleinit(scramble, *smax);
 
-   /* set *alen and *blen based on max A and B from user */
 
-   *alen = 1;
 
-   *blen = 1;
 
-   for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
 
-   {
 
-     while (*alen <= mykey->a_k) *alen *= 2;
 
-     while (*blen <= mykey->b_k) *blen *= 2;
 
-   }
 
-   if (*alen > 2**smax)
 
-   {
 
-     fprintf(stderr,
 
-       "perfect.c: Can't deal with (A,B) having A bigger than twice \n");
 
-     fprintf(stderr,
 
-       "  the smallest power of two greater or equal to any legal hash.\n");
 
-     exit(SUCCESS);
 
-   }
 
-   /* allocate working memory */
 
-   *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen))); 
 
-   tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
 
-   tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
 
- 					     nkeys : *smax),
 
- 			     "perfect.c, tabh");
 
-   /* check that (a,b) are distinct and put them in tabb indexed by b */
 
-   (void)inittab(*tabb, *blen, keys, form, FALSE);
 
-   /* try with smax */
 
-   if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
 
-   {
 
-     if (form->perfect == MINIMAL_HP)
 
-     {
 
-       printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
 
-       exit(SUCCESS);
 
-     }
 
-     else
 
-     {
 
-       /* try with 2*smax */
 
-       free((void *)tabh);
 
-       *smax = *smax * 2;
 
-       scrambleinit(scramble, *smax);
 
-       tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
 
- 						nkeys : *smax),
 
- 				"perfect.c, tabh");
 
-       if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
 
-       {
 
- 	printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
 
- 	exit(SUCCESS);
 
-       }
 
-     }
 
-   }
 
-   /* check if tab[] was really needed */
 
-   for (i=0; i<*blen; ++i)
 
-   {
 
-     if ((*tabb)[i].val_b != 0) break;            /* assumes permute(0) == 0 */
 
-   }
 
-   used_tab = (i < *blen);
 
-   /* write the code for the perfect hash */
 
-   *salt = 1;
 
-   final->used = 1;
 
-   if (!used_tab)
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = a;\n");
 
-   }
 
-   else if (*blen < USE_SCRAMBLE)
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = (a ^ tab[b]);\n");
 
-   }
 
-   else
 
-   {
 
-     sprintf(final->line[0], "  ub4 rsl = (a ^ scramble[tab[b]]);\n");
 
-   }
 
-   printf("success, found a perfect hash\n");
 
-   free((void *)tabq);
 
-   free((void *)tabh);
 
- }
 
- /* guess initial values for alen and blen */
 
- static void initalen(alen, blen, smax, nkeys, form)
 
- ub4      *alen;                                      /* output, initial alen */
 
- ub4      *blen;                                      /* output, initial blen */
 
- ub4      *smax;    /* input, power of two greater or equal to max hash value */
 
- ub4       nkeys;                              /* number of keys being hashed */
 
- hashform *form;                                           /* user directives */
 
- {
 
-   /*
 
-    * Find initial *alen, *blen
 
-    * Initial alen and blen values were found empirically.  Some factors:
 
-    *
 
-    * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
 
-    *
 
-    * alen and blen must be powers of 2 because the values in 0..alen-1 and
 
-    * 0..blen-1 are produced by applying a bitmask to the initial hash function.
 
-    *
 
-    * alen must be less than smax, in fact less than nkeys, because otherwise
 
-    * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
 
-    * all the *a*s associated with a given *b*, so there would be no legal
 
-    * value to assign to tab[b].  This only matters when we're doing a minimal
 
-    * perfect hash.
 
-    *
 
-    * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
 
-    * and alen*blen = smax*smax/32.
 
-    *
 
-    * Values of blen less than smax/4 never work, and smax/2 always works.
 
-    *
 
-    * We want blen as small as possible because it is the number of bytes in
 
-    * the huge array we must create for the perfect hash.
 
-    *
 
-    * When nkey <= smax*(5/8), blen=smax/4 works much more often with 
 
-    * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
 
-    * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
 
-    * has something to do with 5/8 = 1/8 * 5.  For example examine 80000, 
 
-    * 85000, and 90000 keys with different values of alen.  This only matters
 
-    * if we're doing a minimal perfect hash.
 
-    *
 
-    * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
 
-    * Bigger than that it must produce two integers, which increases the
 
-    * cost of the hash per character hashed.
 
-    */
 
-   if (form->perfect == NORMAL_HP)
 
-   {
 
-     if ((form->speed == FAST_HS) && (nkeys > *smax*0.8))
 
-     {
 
-       *smax = *smax * 2;
 
-     }
 
-     *alen = ((form->hashtype==INT_HT) && *smax>131072) ? 
 
-       ((ub4)1<<(UB4BITS-mylog2(*blen))) :   /* distinct keys => distinct (A,B) */
 
-       *smax;                         /* no reason to restrict alen to smax/2 */
 
-     if ((form->hashtype == INT_HT) && *smax < 32)
 
-       *blen = *smax;                      /* go for function speed not space */
 
-     else if (*smax/4 <= (1<<14))
 
-       *blen = ((nkeys <= *smax*0.56) ? *smax/32 :
 
- 	       (nkeys <= *smax*0.74) ? *smax/16 : *smax/8);
 
-     else
 
-       *blen = ((nkeys <= *smax*0.6) ? *smax/16 : 
 
- 	       (nkeys <= *smax*0.8) ? *smax/8 : *smax/4);
 
-     if ((form->speed == FAST_HS) && (*blen < *smax/8))
 
-       *blen = *smax/8;
 
-     if (*alen < 1) *alen = 1;
 
-     if (*blen < 1) *blen = 1;
 
-   }
 
-   else
 
-   {
 
-     switch(mylog2(*smax))
 
-     {
 
-     case 0:
 
-       *alen = 1;
 
-       *blen = 1;
 
-     case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
 
-       *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2;
 
-       *blen = *smax/2;
 
-       break;
 
-     case 9:
 
-     case 10:
 
-     case 11:
 
-     case 12:
 
-     case 13:
 
-     case 14:
 
-     case 15:
 
-     case 16:
 
-     case 17:
 
-       if (form->speed == FAST_HS)
 
-       {
 
- 	*alen = *smax/2;
 
- 	*blen = *smax/4;
 
-       }
 
-       else if (*smax/4 < USE_SCRAMBLE)
 
-       {
 
- 	*alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
 
- 	*blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
 
-       }
 
-       else
 
-       {
 
- 	*alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 : 
 
- 		 (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2);
 
- 	*blen = *smax/4;                /* always give the small size a shot */
 
-       }
 
-       break;
 
-     case 18:
 
-       if (form->speed == FAST_HS)
 
-       {
 
- 	*alen = *smax/2;
 
- 	*blen = *smax/2;
 
-       }
 
-       else
 
-       {
 
- 	*alen = *smax/8;                 /* never require the multiword hash */
 
- 	*blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
 
-       }
 
-       break;
 
-     case 19:
 
-     case 20:
 
-       *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2;
 
-       *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
 
-       break;
 
-     default:
 
-       *alen = *smax/2;              /* just find a hash as quick as possible */
 
-       *blen = *smax/2;     /* we'll be thrashing virtual memory at this size */
 
-       break;
 
-     }
 
-   }
 
- }
 
- /* 
 
- ** Try to find a perfect hash function.  
 
- ** Return the successful initializer for the initial hash. 
 
- ** Return 0 if no perfect hash could be found.
 
- */
 
- void findhash(tabb, alen, blen, salt, final, 
 
- 	      scramble, smax, keys, nkeys, form)
 
- bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
 
- ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
 
- ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
 
- ub4      *salt;                         /* output, initializes initial hash */
 
- gencode  *final;                                      /* code for final hash */
 
- ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
 
- ub4      *smax;                           /* input, scramble[i] in 0..smax-1 */
 
- key      *keys;                                       /* input, keys to hash */
 
- ub4       nkeys;                       /* input, number of keys being hashed */
 
- hashform *form;                                           /* user directives */
 
- {
 
-   ub4 bad_initkey;                       /* how many times did initkey fail? */
 
-   ub4 bad_perfect;                       /* how many times did perfect fail? */
 
-   ub4 trysalt;                        /* trial initializer for initial hash */
 
-   ub4 maxalen;
 
-   hstuff *tabh;                       /* table of keys indexed by hash value */
 
-   qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
 
-   /* The case of (A,B) supplied by the user is a special case */
 
-   if (form->hashtype == AB_HT)
 
-   {
 
-     hash_ab(tabb, alen, blen, salt, final, 
 
- 	    scramble, smax, keys, nkeys, form);
 
-     return;
 
-   }
 
-   /* guess initial values for smax, alen and blen */
 
-   *smax = ((ub4)1<<mylog2(nkeys));
 
-   initalen(alen, blen, smax, nkeys, form);
 
-   scrambleinit(scramble, *smax);
 
-   maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax;
 
-   /* allocate working memory */
 
-   *tabb = (bstuff *)remalloc((size_t)(sizeof(bstuff)*(*blen)), 
 
- 			     "perfect.c, tabb");
 
-   tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
 
-   tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
 
- 					     nkeys : *smax),
 
- 			     "perfect.c, tabh");
 
-   /* Actually find the perfect hash */
 
-   *salt = 0;
 
-   bad_initkey = 0;
 
-   bad_perfect = 0;
 
-   for (trysalt=1; ; ++trysalt)
 
-   {
 
-     ub4 rslinit;
 
-     /* Try to find distinct (A,B) for all keys */
 
-     
 
-     rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt,
 
- 		      form, final);
 
-     if (rslinit == 2)
 
-     {      /* initkey actually found a perfect hash, not just distinct (a,b) */
 
-       *salt = 1;
 
-       *blen = 0;
 
-       break;
 
-     }
 
-     else if (rslinit == 0)
 
-     {
 
-       /* didn't find distinct (a,b) */
 
-       if (++bad_initkey >= RETRY_INITKEY)
 
-       {
 
- 	/* Try to put more bits in (A,B) to make distinct (A,B) more likely */
 
- 	if (*alen < maxalen)
 
- 	{
 
- 	  *alen *= 2;
 
- 	} 
 
- 	else if (*blen < *smax)
 
- 	{
 
- 	  *blen *= 2;
 
- 	  free(tabq);
 
- 	  free(*tabb);
 
- 	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
 
- 	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
 
- 	}
 
- 	else
 
- 	{
 
- 	  duplicates(*tabb, *blen, keys, form);      /* check for duplicates */
 
- 	  printf("fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
 
- 	  exit(SUCCESS);
 
- 	}
 
- 	bad_initkey = 0;
 
- 	bad_perfect = 0;
 
-       }
 
-       continue;                             /* two keys have same (a,b) pair */
 
-     }
 
-     printf("found distinct (A,B) on attempt %ld\n", trysalt);
 
-     /* Given distinct (A,B) for all keys, build a perfect hash */
 
-     if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
 
-     {
 
-       if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) || 
 
- 	  (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX))
 
-       {
 
- 	if (*blen < *smax)
 
- 	{
 
- 	  *blen *= 2;
 
- 	  free(*tabb);
 
- 	  free(tabq);
 
- 	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
 
- 	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
 
- 	  --trysalt;               /* we know this salt got distinct (A,B) */
 
- 	}
 
- 	else
 
- 	{
 
- 	  printf("fatal error: Cannot perfect hash: cannot build tab[]\n");
 
- 	  exit(SUCCESS);
 
- 	}
 
- 	bad_perfect = 0;
 
-       }
 
-       continue;
 
-     }
 
-     
 
-     *salt = trysalt;
 
-     break;
 
-   }
 
-   printf("built perfect hash table of size %ld\n", *blen);
 
-   /* free working memory */
 
-   free((void *)tabh);
 
-   free((void *)tabq);
 
- }
 
- /*
 
- ------------------------------------------------------------------------------
 
- Input/output type routines
 
- ------------------------------------------------------------------------------
 
- */
 
- /* get the list of keys */
 
- static void getkeys(keys, nkeys, textroot, keyroot, form)
 
- key      **keys;                                         /* list of all keys */
 
- ub4       *nkeys;                                          /* number of keys */
 
- reroot    *textroot;                          /* get space to store key text */
 
- reroot    *keyroot;                                    /* get space for keys */
 
- hashform  *form;                                          /* user directives */
 
- {
 
-   key  *mykey;
 
-   char *mytext;
 
-   mytext = (char *)renew(textroot);
 
-   *keys = 0;
 
-   *nkeys = 0;
 
-   while (fgets(mytext, MAXKEYLEN, stdin))
 
-   {
 
-     mykey = (key *)renew(keyroot);
 
-     if (form->mode == AB_HM)
 
-     {
 
-       sscanf(mytext, "%lx %lx ", &mykey->a_k, &mykey->b_k);
 
-     }
 
-     else if (form->mode == ABDEC_HM)
 
-     {
 
-       sscanf(mytext, "%ld %ld ", &mykey->a_k, &mykey->b_k);
 
-     }
 
-     else if (form->mode == HEX_HM)
 
-     {
 
-       sscanf(mytext, "%lx ", &mykey->hash_k);
 
-     }
 
-     else if (form->mode == DECIMAL_HM)
 
-     {
 
-       sscanf(mytext, "%ld ", &mykey->hash_k);
 
-     }
 
-     else
 
-     {
 
-       mykey->name_k = (ub1 *)mytext;
 
-       mytext = (char *)renew(textroot);
 
-       mykey->len_k  = (ub4)(strlen((char *)mykey->name_k)-1);
 
-     }
 
-     mykey->next_k = *keys;
 
-     *keys = mykey;
 
-     ++*nkeys;
 
-   }
 
-   redel(textroot, mytext);
 
- }
 
- /* make the .h file */
 
- static void make_h(blen, smax, nkeys, salt)
 
- ub4  blen;
 
- ub4  smax;
 
- ub4  nkeys;
 
- ub4  salt;
 
- {
 
-   FILE *f;
 
-   f = fopen("phash.h", "w");
 
-   fprintf(f, "/* Perfect hash definitions */\n");
 
-   fprintf(f, "#ifndef STANDARD\n");
 
-   fprintf(f, "#include \"standard.h\"\n");
 
-   fprintf(f, "#endif /* STANDARD */\n");
 
-   fprintf(f, "#ifndef PHASH\n");
 
-   fprintf(f, "#define PHASH\n");
 
-   fprintf(f, "\n");
 
-   if (blen > 0)
 
-   {
 
-     if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
 
-       fprintf(f, "extern ub1 tab[];\n");
 
-     else
 
-     {
 
-       fprintf(f, "extern ub2 tab[];\n");
 
-       if (blen >= USE_SCRAMBLE)
 
-       {
 
- 	if (smax <= UB2MAXVAL+1)
 
- 	  fprintf(f, "extern ub2 scramble[];\n");
 
- 	else
 
- 	  fprintf(f, "extern ub4 scramble[];\n");
 
-       }
 
-     }
 
-     fprintf(f, "#define PHASHLEN 0x%lx  /* length of hash mapping table */\n",
 
- 	    blen);
 
-   }
 
-   fprintf(f, "#define PHASHNKEYS %ld  /* How many keys were hashed */\n",
 
-           nkeys);
 
-   fprintf(f, "#define PHASHRANGE %ld  /* Range any input might map to */\n",
 
-           smax);
 
-   fprintf(f, "#define PHASHSALT 0x%.8lx /* internal, initialize normal hash */\n",
 
-           salt*0x9e3779b9);
 
-   fprintf(f, "\n");
 
-   fprintf(f, "ub4 phash();\n");
 
-   fprintf(f, "\n");
 
-   fprintf(f, "#endif  /* PHASH */\n");
 
-   fprintf(f, "\n");
 
-   fclose(f);
 
- }
 
- /* make the .c file */
 
- static void make_c(tab, smax, blen, scramble, final, form)
 
- bstuff   *tab;                                         /* table indexed by b */
 
- ub4       smax;                                       /* range of scramble[] */
 
- ub4       blen;                                /* b in 0..blen-1, power of 2 */
 
- ub4      *scramble;                                    /* used in final hash */
 
- gencode  *final;                                  /* code for the final hash */
 
- hashform *form;                                           /* user directives */
 
- {
 
-   ub4   i;
 
-   FILE *f;
 
-   f = fopen("phash.c", "w");
 
-   fprintf(f, "/* table for the mapping for the perfect hash */\n");
 
-   fprintf(f, "#ifndef STANDARD\n");
 
-   fprintf(f, "#include \"standard.h\"\n");
 
-   fprintf(f, "#endif /* STANDARD */\n");
 
-   fprintf(f, "#ifndef PHASH\n");
 
-   fprintf(f, "#include \"phash.h\"\n");
 
-   fprintf(f, "#endif /* PHASH */\n");
 
-   fprintf(f, "#ifndef LOOKUPA\n");
 
-   fprintf(f, "#include \"lookupa.h\"\n");
 
-   fprintf(f, "#endif /* LOOKUPA */\n");
 
-   fprintf(f, "\n");
 
-   if (blen >= USE_SCRAMBLE)
 
-   {
 
-     fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n");
 
-     if (smax > UB2MAXVAL+1)
 
-     {
 
-       fprintf(f, "ub4 scramble[] = {\n");
 
-       for (i=0; i<=UB1MAXVAL; i+=4)
 
-         fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
 
-                 scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
 
-     }
 
-     else
 
-     {
 
-       fprintf(f, "ub2 scramble[] = {\n");
 
-       for (i=0; i<=UB1MAXVAL; i+=8)
 
-         fprintf(f, 
 
- "0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
 
-                 scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
 
-                 scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
 
-     }
 
-     fprintf(f, "};\n");
 
-     fprintf(f, "\n");
 
-   }
 
-   if (blen > 0)
 
-   {
 
-     fprintf(f, "/* small adjustments to _a_ to make values distinct */\n");
 
-     if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
 
-       fprintf(f, "ub1 tab[] = {\n");
 
-     else
 
-       fprintf(f, "ub2 tab[] = {\n");
 
-     if (blen < 16)
 
-     {
 
-       for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]);
 
-     }
 
-     else if (blen <= 1024)
 
-     {
 
-       for (i=0; i<blen; i+=16)
 
- 	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
 
- 		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
 
- 		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
 
- 		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
 
- 		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 
 
- 		scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 
 
- 		scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 
 
- 		scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 
 
- 		scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 
 
-     }
 
-     else if (blen < USE_SCRAMBLE)
 
-     {
 
-       for (i=0; i<blen; i+=8)
 
- 	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
 
- 		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
 
- 		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
 
- 		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
 
- 		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 
 
-     }
 
-     else 
 
-     {
 
-       for (i=0; i<blen; i+=16)
 
- 	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
 
- 		tab[i+0].val_b, tab[i+1].val_b, 
 
- 		tab[i+2].val_b, tab[i+3].val_b, 
 
- 		tab[i+4].val_b, tab[i+5].val_b, 
 
- 		tab[i+6].val_b, tab[i+7].val_b, 
 
- 		tab[i+8].val_b, tab[i+9].val_b, 
 
- 		tab[i+10].val_b, tab[i+11].val_b, 
 
- 		tab[i+12].val_b, tab[i+13].val_b, 
 
- 		tab[i+14].val_b, tab[i+15].val_b); 
 
-     }
 
-     fprintf(f, "};\n");
 
-     fprintf(f, "\n");
 
-   }
 
-   fprintf(f, "/* The hash function */\n");
 
-   switch(form->mode)
 
-   {
 
-   case NORMAL_HM:
 
-     fprintf(f, "ub4 phash(key, len)\n");
 
-     fprintf(f, "char *key;\n");
 
-     fprintf(f, "int   len;\n");
 
-     break;
 
-   case INLINE_HM:
 
-   case HEX_HM:
 
-   case DECIMAL_HM:
 
-     fprintf(f, "ub4 phash(val)\n");
 
-     fprintf(f, "ub4 val;\n");
 
-     break;
 
-   case AB_HM:
 
-   case ABDEC_HM:
 
-     fprintf(f, "ub4 phash(a,b)\n");
 
-     fprintf(f, "ub4 a;\n");
 
-     fprintf(f, "ub4 b;\n");
 
-     break;
 
-   }
 
-   fprintf(f, "{\n");
 
-   for (i=0; i<final->used; ++i)
 
-     fprintf(f, final->line[i]);
 
-   fprintf(f, "  return rsl;\n");
 
-   fprintf(f, "}\n");
 
-   fprintf(f, "\n");
 
-   fclose(f);
 
- }
 
- /*
 
- ------------------------------------------------------------------------------
 
- Read in the keys, find the hash, and write the .c and .h files
 
- ------------------------------------------------------------------------------
 
- */
 
- static void driver(form)
 
- hashform *form;                                           /* user directives */
 
- {
 
-   ub4       nkeys;                                         /* number of keys */
 
-   key      *keys;                                    /* head of list of keys */
 
-   bstuff   *tab;                                       /* table indexed by b */
 
-   ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
 
-   ub4       alen;                            /* a in 0..alen-1, a power of 2 */
 
-   ub4       blen;                            /* b in 0..blen-1, a power of 2 */
 
-   ub4       salt;                       /* a parameter to the hash function */
 
-   reroot   *textroot;                      /* MAXKEYLEN-character text lines */
 
-   reroot   *keyroot;                                       /* source of keys */
 
-   gencode   final;                                    /* code for final hash */
 
-   ub4       i;
 
-   ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
 
-   char      buf[10][80];                        /* buffer for generated code */
 
-   char     *buf2[10];                             /* also for generated code */
 
-   /* set up memory sources */
 
-   textroot = remkroot((size_t)MAXKEYLEN);
 
-   keyroot  = remkroot(sizeof(key));
 
-   /* set up code for final hash */
 
-   final.line = buf2;
 
-   final.used = 0;
 
-   final.len  = 10;
 
-   for (i=0; i<10; ++i) final.line[i] = buf[i];
 
-   /* read in the list of keywords */
 
-   getkeys(&keys, &nkeys, textroot, keyroot, form);
 
-   printf("Read in %ld keys\n",nkeys);
 
-   /* find the hash */
 
-   findhash(&tab, &alen, &blen, &salt, &final, 
 
- 	   scramble, &smax, keys, nkeys, form);
 
-   /* generate the phash.h file */
 
-   make_h(blen, smax, nkeys, salt);
 
-   printf("Wrote phash.h\n");
 
-   /* generate the phash.c file */
 
-   make_c(tab, smax, blen, scramble, &final, form);
 
-   printf("Wrote phash.c\n");
 
-   /* clean up memory sources */
 
-   refree(textroot);
 
-   refree(keyroot);
 
-   free((void *)tab);
 
-   printf("Cleaned up\n");
 
- }
 
- /* Describe how to use this utility */
 
- static void usage_error()
 
- {
 
-   printf("Usage: perfect [-{NnIiHhDdAaBb}{MmPp}{FfSs}] < key.txt \n");
 
-   printf("The input is a list of keys, one key per line.\n");
 
-   printf("Only one of NnIiHhDdAa and one of MmPp may be specified.\n");
 
-   printf("  N,n: normal mode, key is any string string (default).\n");
 
-   printf("  I,i: initial hash for ASCII char strings.\n");
 
-   printf("The initial hash must be\n");
 
-   printf("  hash = PHASHSALT;\n");
 
-   printf("  for (i=0; i<keylength; ++i) {\n");
 
-   printf("    hash = (hash ^ key[i]) + ((hash<<26)+(hash>>6));\n");
 
-   printf("  }\n");
 
-   printf("Note that this can be inlined in any user loop that walks\n");
 
-   printf("through the key anyways, eliminating the loop overhead.\n");
 
-   printf("  H,h: Keys are 4-byte integers in hex in this format:\n");
 
-   printf("ffffffff\n");
 
-   printf("This is good for optimizing switch statement compilation.\n");
 
-   printf("  D,d: Same as H,h, except in decimal not hexidecimal\n");
 
-   printf("  A,a: An (A,B) pair is supplied in hex in this format:\n");
 
-   printf("aaa bbb\n");
 
-   printf("  B,b: Same as A,a, except in decimal not hexidecimal\n");
 
-   printf("This mode does nothing but find the values of tab[].\n");
 
-   printf("*A* must be less than the total number of keys.\n");
 
-   printf("  M,m: Minimal perfect hash.  Hash will be in 0..nkeys-1 (default)\n");
 
-   printf("  P,p: Perfect hash.  Hash will be in 0..n-1, where n >= nkeys\n");
 
-   printf("and n is a power of 2.  Will probably use a smaller tab[].");
 
-   printf("  F,f: Fast mode.  Generate the perfect hash fast.\n");
 
-   printf("  S,s: Slow mode.  Spend time finding a good perfect hash.\n");
 
-   exit(SUCCESS);
 
- }
 
- /* Interpret arguments and call the driver */
 
- /* See usage_error for the expected arguments */
 
- int main(argc, argv)
 
- int    argc;
 
- char **argv;
 
- {
 
-   int      mode_given = FALSE;
 
-   int      minimal_given = FALSE;
 
-   int      speed_given = FALSE;
 
-   hashform form;
 
-   char    *c;
 
-   /* default behavior */
 
-   form.mode = NORMAL_HM;
 
-   form.hashtype = STRING_HT;
 
-   form.perfect = MINIMAL_HP;
 
-   form.speed = SLOW_HS;
 
-   /* let the user override the default behavior */
 
-   switch (argc)
 
-   {
 
-   case 1:
 
-     break;
 
-   case 2:
 
-     if (argv[1][0] != '-')
 
-     {
 
-       usage_error();
 
-       break;
 
-     }
 
-     for (c = &argv[1][1]; *c != '\0'; ++c) switch(*c)
 
-     {
 
-     case 'n': case 'N':
 
-     case 'i': case 'I':
 
-     case 'h': case 'H':
 
-     case 'd': case 'D':
 
-     case 'a': case 'A':
 
-     case 'b': case 'B':
 
-       if (mode_given == TRUE) 
 
- 	usage_error();
 
-       switch(*c)
 
-       {
 
-       case 'n': case 'N':
 
- 	form.mode = NORMAL_HM;  form.hashtype = STRING_HT; break;
 
-       case 'i': case 'I':
 
- 	form.mode = INLINE_HM;  form.hashtype = STRING_HT; break;
 
-       case 'h': case 'H':
 
- 	form.mode = HEX_HM;     form.hashtype = INT_HT; break;
 
-       case 'd': case 'D':
 
- 	form.mode = DECIMAL_HM; form.hashtype = INT_HT; break;
 
-       case 'a': case 'A':
 
- 	form.mode = AB_HM;      form.hashtype = AB_HT; break;
 
-       case 'b': case 'B':
 
- 	form.mode = ABDEC_HM;   form.hashtype = AB_HT; break;
 
-       }
 
-       mode_given = TRUE;
 
-       break;
 
-     case 'm': case 'M':
 
-     case 'p': case 'P':
 
-       if (minimal_given == TRUE)
 
- 	usage_error();
 
-       switch(*c)
 
-       {
 
-       case 'p': case 'P':
 
- 	form.perfect = NORMAL_HP; break;
 
-       case 'm': case 'M':
 
- 	form.perfect = MINIMAL_HP; break;
 
-       }
 
-       minimal_given = TRUE;
 
-       break;
 
-     case 'f': case 'F':
 
-     case 's': case 'S':
 
-       if (speed_given == TRUE)
 
- 	usage_error();
 
-       switch(*c)
 
-       {
 
-       case 'f': case 'F':
 
- 	form.speed = FAST_HS; break;
 
-       case 's': case 'S':
 
- 	form.speed = SLOW_HS; break;
 
-       }
 
-       speed_given = TRUE;
 
-       break;
 
-     default:
 
-       usage_error();
 
-     }
 
-     break;
 
-   default:
 
-     usage_error();
 
-   }
 
-   /* Generate the [minimal] perfect hash */
 
-   driver(&form);
 
-   return SUCCESS;
 
- }
 
 
  |