File size: 1,970 Bytes
59ebac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""helpers for compute measures: hash, time benchmarks"""
from pathlib import Path


def hash_calculate(arr: any, debug: bool = False) -> str or bytes:
    """
    Return computed hash from input variable (typically a numpy array).

    Args:
        arr: input variable
        debug: logging debug argument

    Returns:
        str or bytes: computed hash from input variable

    """
    import hashlib
    import numpy as np
    from base64 import b64encode

    from src.utilities.utilities import setup_logging
    local_logger = setup_logging(debug)

    if isinstance(arr, np.ndarray):
        hash_fn = hashlib.sha256(arr.data)
    elif isinstance(arr, dict):
        import json
        from src.utilities.serialize import serialize

        serialized = serialize(arr)
        variable_to_hash = json.dumps(serialized, sort_keys=True).encode('utf-8')
        hash_fn = hashlib.sha256(variable_to_hash)
    elif isinstance(arr, str):
        try:
            hash_fn = hashlib.sha256(arr)
        except TypeError:
            local_logger.warning(f"TypeError, re-try encoding arg:{arr},type:{type(arr)}.")
            hash_fn = hashlib.sha256(arr.encode('utf-8'))
    elif isinstance(arr, bytes):
        hash_fn = hashlib.sha256(arr)
    else:
        raise ValueError(f"variable 'arr':{arr} not yet handled.")
    return b64encode(hash_fn.digest())


def sha256sum(filename: Path or str) -> str:
    """
    Return computed hash for input file.

    Args:
        filename: input variable

    Returns:
        str: computed hash

    """
    import hashlib
    import mmap

    h = hashlib.sha256()
    with open(filename, 'rb') as f:
        with mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) as mm:
            h.update(mm)
    return h.hexdigest()


def perf_counter() -> float:
    """
    Performance counter for benchmarking.

    Returns:
        float: computed time value at execution time

    """
    import time
    return time.perf_counter()