File size: 2,456 Bytes
d1ed09d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import binascii
import hashlib

hashers = []  # In decreasing performance order


# Timings on a largish array:
# - CityHash is 2x faster than MurmurHash
# - xxHash is slightly slower than CityHash
# - MurmurHash is 8x faster than SHA1
# - SHA1 is significantly faster than all other hashlib algorithms

try:
    import cityhash  # `python -m pip install cityhash`
except ImportError:
    pass
else:
    # CityHash disabled unless the reference leak in
    # https://github.com/escherba/python-cityhash/pull/16
    # is fixed.
    if cityhash.__version__ >= "0.2.2":

        def _hash_cityhash(buf):
            """
            Produce a 16-bytes hash of *buf* using CityHash.
            """
            h = cityhash.CityHash128(buf)
            return h.to_bytes(16, "little")

        hashers.append(_hash_cityhash)

try:
    import xxhash  # `python -m pip install xxhash`
except ImportError:
    pass
else:

    def _hash_xxhash(buf):
        """
        Produce a 8-bytes hash of *buf* using xxHash.
        """
        return xxhash.xxh64(buf).digest()

    hashers.append(_hash_xxhash)

try:
    import mmh3  # `python -m pip install mmh3`
except ImportError:
    pass
else:

    def _hash_murmurhash(buf):
        """
        Produce a 16-bytes hash of *buf* using MurmurHash.
        """
        return mmh3.hash_bytes(buf)

    hashers.append(_hash_murmurhash)


def _hash_sha1(buf):
    """
    Produce a 20-bytes hash of *buf* using SHA1.
    """
    return hashlib.sha1(buf).digest()


hashers.append(_hash_sha1)


def hash_buffer(buf, hasher=None):
    """
    Hash a bytes-like (buffer-compatible) object.  This function returns
    a good quality hash but is not cryptographically secure.  The fastest
    available algorithm is selected.  A fixed-length bytes object is returned.
    """
    if hasher is not None:
        try:
            return hasher(buf)
        except (TypeError, OverflowError):
            # Some hash libraries may have overly-strict type checking,
            # not accepting all buffers
            pass
    for hasher in hashers:
        try:
            return hasher(buf)
        except (TypeError, OverflowError):
            pass
    raise TypeError(f"unsupported type for hashing: {type(buf)}")


def hash_buffer_hex(buf, hasher=None):
    """
    Same as hash_buffer, but returns its result in hex-encoded form.
    """
    h = hash_buffer(buf, hasher)
    s = binascii.b2a_hex(h)
    return s.decode()