xxhash.cry 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. module xxhash where
  2. /**
  3. * The 32-bit variant of xxHash. The first argument is the sequence
  4. * of L bytes to hash. The second argument is a seed value.
  5. */
  6. XXH32 : {L} (fin L) => [L][8] -> [32] -> [32]
  7. XXH32 input seed = XXH32_avalanche acc1
  8. where (stripes16 # stripes4 # stripes1) = input
  9. accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8])
  10. accL = `(L % 2^^32) + if (`L:Integer) < 16
  11. then seed + PRIME32_5
  12. else XXH32_converge accR
  13. acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8])
  14. acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8])
  15. /**
  16. * The 64-bit variant of xxHash. The first argument is the sequence
  17. * of L bytes to hash. The second argument is a seed value.
  18. */
  19. XXH64 : {L} (fin L) => [L][8] -> [64] -> [64]
  20. XXH64 input seed = XXH64_avalanche acc1
  21. where (stripes32 # stripes8 # stripes4 # stripes1) = input
  22. accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8])
  23. accL = `(L % 2^^64) + if (`L:Integer) < 32
  24. then seed + PRIME64_5
  25. else XXH64_converge accR
  26. acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8])
  27. acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8])
  28. acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8])
  29. private
  30. //Utility functions
  31. /**
  32. * Combines a sequence of bytes into a word using the little-endian
  33. * convention.
  34. */
  35. toLE bytes = join (reverse bytes)
  36. //32-bit xxHash helper functions
  37. //32-bit prime number constants
  38. PRIME32_1 = 0x9E3779B1 : [32]
  39. PRIME32_2 = 0x85EBCA77 : [32]
  40. PRIME32_3 = 0xC2B2AE3D : [32]
  41. PRIME32_4 = 0x27D4EB2F : [32]
  42. PRIME32_5 = 0x165667B1 : [32]
  43. /**
  44. * The property shows that the hexadecimal representation of the
  45. * PRIME32 constants is the same as the binary representation.
  46. */
  47. property PRIME32s_as_bits_correct =
  48. (PRIME32_1 == 0b10011110001101110111100110110001) /\
  49. (PRIME32_2 == 0b10000101111010111100101001110111) /\
  50. (PRIME32_3 == 0b11000010101100101010111000111101) /\
  51. (PRIME32_4 == 0b00100111110101001110101100101111) /\
  52. (PRIME32_5 == 0b00010110010101100110011110110001)
  53. /**
  54. * This function initializes the four internal accumulators of XXH32.
  55. */
  56. XXH32_init : [32] -> [4][32]
  57. XXH32_init seed = [acc1, acc2, acc3, acc4]
  58. where acc1 = seed + PRIME32_1 + PRIME32_2
  59. acc2 = seed + PRIME32_2
  60. acc3 = seed + 0
  61. acc4 = seed - PRIME32_1
  62. /**
  63. * This processes a single lane of the main round function of XXH32.
  64. */
  65. XXH32_round : [32] -> [32] -> [32]
  66. XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1
  67. /**
  68. * This is the main round function of XXH32 and processes a stripe,
  69. * i.e. 4 lanes with 4 bytes each.
  70. */
  71. XXH32_rounds : [4][32] -> [16][8] -> [4][32]
  72. XXH32_rounds accs stripe =
  73. [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
  74. /**
  75. * This function combines the four lane accumulators into a single
  76. * 32-bit value.
  77. */
  78. XXH32_converge : [4][32] -> [32]
  79. XXH32_converge [acc1, acc2, acc3, acc4] =
  80. (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)
  81. /**
  82. * This function digests a four byte lane
  83. */
  84. XXH32_digest4 : [32] -> [4][8] -> [32]
  85. XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4
  86. /**
  87. * This function digests a single byte lane
  88. */
  89. XXH32_digest1 : [32] -> [8] -> [32]
  90. XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1
  91. /**
  92. * This function ensures that all input bits have a chance to impact
  93. * any bit in the output digest, resulting in an unbiased
  94. * distribution.
  95. */
  96. XXH32_avalanche : [32] -> [32]
  97. XXH32_avalanche acc0 = acc5
  98. where acc1 = acc0 ^ (acc0 >> 15)
  99. acc2 = acc1 * PRIME32_2
  100. acc3 = acc2 ^ (acc2 >> 13)
  101. acc4 = acc3 * PRIME32_3
  102. acc5 = acc4 ^ (acc4 >> 16)
  103. //64-bit xxHash helper functions
  104. //64-bit prime number constants
  105. PRIME64_1 = 0x9E3779B185EBCA87 : [64]
  106. PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64]
  107. PRIME64_3 = 0x165667B19E3779F9 : [64]
  108. PRIME64_4 = 0x85EBCA77C2B2AE63 : [64]
  109. PRIME64_5 = 0x27D4EB2F165667C5 : [64]
  110. /**
  111. * The property shows that the hexadecimal representation of the
  112. * PRIME64 constants is the same as the binary representation.
  113. */
  114. property PRIME64s_as_bits_correct =
  115. (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\
  116. (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\
  117. (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\
  118. (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\
  119. (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101)
  120. /**
  121. * This function initializes the four internal accumulators of XXH64.
  122. */
  123. XXH64_init : [64] -> [4][64]
  124. XXH64_init seed = [acc1, acc2, acc3, acc4]
  125. where acc1 = seed + PRIME64_1 + PRIME64_2
  126. acc2 = seed + PRIME64_2
  127. acc3 = seed + 0
  128. acc4 = seed - PRIME64_1
  129. /**
  130. * This processes a single lane of the main round function of XXH64.
  131. */
  132. XXH64_round : [64] -> [64] -> [64]
  133. XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1
  134. /**
  135. * This is the main round function of XXH64 and processes a stripe,
  136. * i.e. 4 lanes with 8 bytes each.
  137. */
  138. XXH64_rounds : [4][64] -> [32][8] -> [4][64]
  139. XXH64_rounds accs stripe =
  140. [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
  141. /**
  142. * This is a helper function, used to merge the four lane accumulators.
  143. */
  144. mergeAccumulator : [64] -> [64] -> [64]
  145. mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4
  146. /**
  147. * This function combines the four lane accumulators into a single
  148. * 64-bit value.
  149. */
  150. XXH64_converge : [4][64] -> [64]
  151. XXH64_converge [acc1, acc2, acc3, acc4] =
  152. foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4]
  153. /**
  154. * This function digests an eight byte lane
  155. */
  156. XXH64_digest8 : [64] -> [8][8] -> [64]
  157. XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4
  158. /**
  159. * This function digests a four byte lane
  160. */
  161. XXH64_digest4 : [64] -> [4][8] -> [64]
  162. XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3
  163. /**
  164. * This function digests a single byte lane
  165. */
  166. XXH64_digest1 : [64] -> [8] -> [64]
  167. XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1
  168. /**
  169. * This function ensures that all input bits have a chance to impact
  170. * any bit in the output digest, resulting in an unbiased
  171. * distribution.
  172. */
  173. XXH64_avalanche : [64] -> [64]
  174. XXH64_avalanche acc0 = acc5
  175. where acc1 = acc0 ^ (acc0 >> 33)
  176. acc2 = acc1 * PRIME64_2
  177. acc3 = acc2 ^ (acc2 >> 29)
  178. acc4 = acc3 * PRIME64_3
  179. acc5 = acc4 ^ (acc4 >> 32)