File size: 1,381 Bytes
db5855f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
import glob
import mmap
import sys


def get_notebooks(path: str):
    return glob.glob(f"{path}/*/[0-9]*.ipynb")


def get_tags(path: str):
    return json.load(open(path))


def find_tags_for_notebook(notebook_path: str, tags: dict):
    nb_tags = []
    with open(notebook_path) as file:
        f = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
        for tag, keywords in tags.items():
            for keyword in keywords:
                if f.find(bytes(keyword, "utf-8")) != -1:
                    nb_tags.append(tag)
                    break
    return nb_tags


def find_tags_for_all_notebooks(notebooks: list, tags: dict):
    notebooks_tags = {}
    for notebook in notebooks:
        nb_tags = sorted(find_tags_for_notebook(notebook, tags))
        if nb_tags:
            notebooks_tags[notebook.split("/")[-1].split(".")[0]] = nb_tags
    return notebooks_tags


if __name__ == "__main__":
    if len(sys.argv) == 1:
        notebooks_paths = sorted(get_notebooks("notebooks"))
        tags = get_tags(".ci/keywords.json")["tags"]
    else:
        notebooks_paths = sorted(get_notebooks("/".join(sys.argv[1].split("/")[:-2])))
        tags = get_tags(sys.argv[2])["tags"]
    all_notebooks_tags = find_tags_for_all_notebooks(notebooks_paths, tags)
    print(json.dumps(all_notebooks_tags, indent=4))