| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- module xxhash where
- /**
- * The 32-bit variant of xxHash. The first argument is the sequence
- * of L bytes to hash. The second argument is a seed value.
- */
- XXH32 : {L} (fin L) => [L][8] -> [32] -> [32]
- XXH32 input seed = XXH32_avalanche acc1
- where (stripes16 # stripes4 # stripes1) = input
- accR = foldl XXH32_rounds (XXH32_init seed) (split stripes16 : [L/16][16][8])
- accL = `(L % 2^^32) + if (`L:Integer) < 16
- then seed + PRIME32_5
- else XXH32_converge accR
- acc4 = foldl XXH32_digest4 accL (split stripes4 : [(L%16)/4][4][8])
- acc1 = foldl XXH32_digest1 acc4 (stripes1 : [L%4][8])
- /**
- * The 64-bit variant of xxHash. The first argument is the sequence
- * of L bytes to hash. The second argument is a seed value.
- */
- XXH64 : {L} (fin L) => [L][8] -> [64] -> [64]
- XXH64 input seed = XXH64_avalanche acc1
- where (stripes32 # stripes8 # stripes4 # stripes1) = input
- accR = foldl XXH64_rounds (XXH64_init seed) (split stripes32 : [L/32][32][8])
- accL = `(L % 2^^64) + if (`L:Integer) < 32
- then seed + PRIME64_5
- else XXH64_converge accR
- acc8 = foldl XXH64_digest8 accL (split stripes8 : [(L%32)/8][8][8])
- acc4 = foldl XXH64_digest4 acc8 (split stripes4 : [(L%8)/4][4][8])
- acc1 = foldl XXH64_digest1 acc4 (stripes1 : [L%4][8])
- private
- //Utility functions
- /**
- * Combines a sequence of bytes into a word using the little-endian
- * convention.
- */
- toLE bytes = join (reverse bytes)
- //32-bit xxHash helper functions
- //32-bit prime number constants
- PRIME32_1 = 0x9E3779B1 : [32]
- PRIME32_2 = 0x85EBCA77 : [32]
- PRIME32_3 = 0xC2B2AE3D : [32]
- PRIME32_4 = 0x27D4EB2F : [32]
- PRIME32_5 = 0x165667B1 : [32]
- /**
- * The property shows that the hexadecimal representation of the
- * PRIME32 constants is the same as the binary representation.
- */
- property PRIME32s_as_bits_correct =
- (PRIME32_1 == 0b10011110001101110111100110110001) /\
- (PRIME32_2 == 0b10000101111010111100101001110111) /\
- (PRIME32_3 == 0b11000010101100101010111000111101) /\
- (PRIME32_4 == 0b00100111110101001110101100101111) /\
- (PRIME32_5 == 0b00010110010101100110011110110001)
- /**
- * This function initializes the four internal accumulators of XXH32.
- */
- XXH32_init : [32] -> [4][32]
- XXH32_init seed = [acc1, acc2, acc3, acc4]
- where acc1 = seed + PRIME32_1 + PRIME32_2
- acc2 = seed + PRIME32_2
- acc3 = seed + 0
- acc4 = seed - PRIME32_1
- /**
- * This processes a single lane of the main round function of XXH32.
- */
- XXH32_round : [32] -> [32] -> [32]
- XXH32_round accN laneN = ((accN + laneN * PRIME32_2) <<< 13) * PRIME32_1
- /**
- * This is the main round function of XXH32 and processes a stripe,
- * i.e. 4 lanes with 4 bytes each.
- */
- XXH32_rounds : [4][32] -> [16][8] -> [4][32]
- XXH32_rounds accs stripe =
- [ XXH32_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
- /**
- * This function combines the four lane accumulators into a single
- * 32-bit value.
- */
- XXH32_converge : [4][32] -> [32]
- XXH32_converge [acc1, acc2, acc3, acc4] =
- (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)
- /**
- * This function digests a four byte lane
- */
- XXH32_digest4 : [32] -> [4][8] -> [32]
- XXH32_digest4 acc lane = ((acc + toLE lane * PRIME32_3) <<< 17) * PRIME32_4
- /**
- * This function digests a single byte lane
- */
- XXH32_digest1 : [32] -> [8] -> [32]
- XXH32_digest1 acc lane = ((acc + (0 # lane) * PRIME32_5) <<< 11) * PRIME32_1
- /**
- * This function ensures that all input bits have a chance to impact
- * any bit in the output digest, resulting in an unbiased
- * distribution.
- */
- XXH32_avalanche : [32] -> [32]
- XXH32_avalanche acc0 = acc5
- where acc1 = acc0 ^ (acc0 >> 15)
- acc2 = acc1 * PRIME32_2
- acc3 = acc2 ^ (acc2 >> 13)
- acc4 = acc3 * PRIME32_3
- acc5 = acc4 ^ (acc4 >> 16)
- //64-bit xxHash helper functions
- //64-bit prime number constants
- PRIME64_1 = 0x9E3779B185EBCA87 : [64]
- PRIME64_2 = 0xC2B2AE3D27D4EB4F : [64]
- PRIME64_3 = 0x165667B19E3779F9 : [64]
- PRIME64_4 = 0x85EBCA77C2B2AE63 : [64]
- PRIME64_5 = 0x27D4EB2F165667C5 : [64]
- /**
- * The property shows that the hexadecimal representation of the
- * PRIME64 constants is the same as the binary representation.
- */
- property PRIME64s_as_bits_correct =
- (PRIME64_1 == 0b1001111000110111011110011011000110000101111010111100101010000111) /\
- (PRIME64_2 == 0b1100001010110010101011100011110100100111110101001110101101001111) /\
- (PRIME64_3 == 0b0001011001010110011001111011000110011110001101110111100111111001) /\
- (PRIME64_4 == 0b1000010111101011110010100111011111000010101100101010111001100011) /\
- (PRIME64_5 == 0b0010011111010100111010110010111100010110010101100110011111000101)
- /**
- * This function initializes the four internal accumulators of XXH64.
- */
- XXH64_init : [64] -> [4][64]
- XXH64_init seed = [acc1, acc2, acc3, acc4]
- where acc1 = seed + PRIME64_1 + PRIME64_2
- acc2 = seed + PRIME64_2
- acc3 = seed + 0
- acc4 = seed - PRIME64_1
- /**
- * This processes a single lane of the main round function of XXH64.
- */
- XXH64_round : [64] -> [64] -> [64]
- XXH64_round accN laneN = ((accN + laneN * PRIME64_2) <<< 31) * PRIME64_1
- /**
- * This is the main round function of XXH64 and processes a stripe,
- * i.e. 4 lanes with 8 bytes each.
- */
- XXH64_rounds : [4][64] -> [32][8] -> [4][64]
- XXH64_rounds accs stripe =
- [ XXH64_round accN (toLE laneN) | accN <- accs | laneN <- split stripe ]
- /**
- * This is a helper function, used to merge the four lane accumulators.
- */
- mergeAccumulator : [64] -> [64] -> [64]
- mergeAccumulator acc accN = (acc ^ XXH64_round 0 accN) * PRIME64_1 + PRIME64_4
- /**
- * This function combines the four lane accumulators into a single
- * 64-bit value.
- */
- XXH64_converge : [4][64] -> [64]
- XXH64_converge [acc1, acc2, acc3, acc4] =
- foldl mergeAccumulator ((acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18)) [acc1, acc2, acc3, acc4]
- /**
- * This function digests an eight byte lane
- */
- XXH64_digest8 : [64] -> [8][8] -> [64]
- XXH64_digest8 acc lane = ((acc ^ XXH64_round 0 (toLE lane)) <<< 27) * PRIME64_1 + PRIME64_4
- /**
- * This function digests a four byte lane
- */
- XXH64_digest4 : [64] -> [4][8] -> [64]
- XXH64_digest4 acc lane = ((acc ^ (0 # toLE lane) * PRIME64_1) <<< 23) * PRIME64_2 + PRIME64_3
- /**
- * This function digests a single byte lane
- */
- XXH64_digest1 : [64] -> [8] -> [64]
- XXH64_digest1 acc lane = ((acc ^ (0 # lane) * PRIME64_5) <<< 11) * PRIME64_1
- /**
- * This function ensures that all input bits have a chance to impact
- * any bit in the output digest, resulting in an unbiased
- * distribution.
- */
- XXH64_avalanche : [64] -> [64]
- XXH64_avalanche acc0 = acc5
- where acc1 = acc0 ^ (acc0 >> 33)
- acc2 = acc1 * PRIME64_2
- acc3 = acc2 ^ (acc2 >> 29)
- acc4 = acc3 * PRIME64_3
- acc5 = acc4 ^ (acc4 >> 32)
|