File size: 4,842 Bytes
de3c2ee
 
 
 
 
 
24b5b67
2bacb33
de3c2ee
 
 
 
 
 
 
24b5b67
de3c2ee
 
 
24b5b67
 
 
 
 
 
 
de3c2ee
 
 
 
 
 
 
 
 
 
24b5b67
 
 
 
 
 
 
de3c2ee
 
 
24b5b67
de3c2ee
 
 
 
 
 
 
 
 
24b5b67
de3c2ee
 
 
 
 
ba91b26
 
 
 
 
 
 
 
de3c2ee
 
 
 
 
 
 
 
 
 
 
 
 
24b5b67
de3c2ee
24b5b67
 
 
 
 
 
 
 
 
de3c2ee
 
 
 
 
 
 
24b5b67
de3c2ee
 
 
 
 
24b5b67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de3c2ee
24b5b67
de3c2ee
 
24b5b67
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
Util functions for openai api
"""
import json
import os
from thefuzz import process
import openai
from utils import search_document, search_document_annoy


def get_lab_member_info(name: str):
    database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
    with open(database_addr, 'r') as fin:
        all_members_info = json.load(fin)

    choices = set()
    for field in all_members_info:
        toSearch = all_members_info[field]
        for i in toSearch:
            choices.add(i['name'])
    best = process.extractOne(name, choices)

    for field in all_members_info:
        toSearch = all_members_info[field]
        for i in toSearch:
            if best[0].lower() in i['name'].lower():
                return json.dumps(i)

    return json.dumps({})


def get_lab_member_detailed_info(name: str, detailed_info: str):
    database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
    with open(database_addr, 'r') as fin:
        all_members_info = json.load(fin)

    choices = set()
    for field in all_members_info:
        toSearch = all_members_info[field]
        for i in toSearch:
            choices.add(i['name'])
    best = process.extractOne(name, choices)

    for field in all_members_info:
        toSearch = all_members_info[field]
        for i in toSearch:
            if best[0].lower() in i['name'].lower():
                if "link" in detailed_info.lower() or "homepage" in detailed_info.lower():
                    return json.dumps(i['links'])
                elif "photo" in detailed_info.lower() or "pic" in detailed_info.lower() or "picture" in detailed_info.lower():
                    return json.dumps(i['photo'])
                else:
                    return json.dumps(i["description"])

    return json.dumps({})


def get_publication_by_year(year: str):
    database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
    with open(database_addr, 'r') as fin:
        all_pub_info = json.load(fin)
    data = {}
    for pub in all_pub_info:
        if int(year) == pub['year']:
            data.update(pub)
    # for field in all_pub_info:
    #     to_search = all_pub_info[field]
    #     for i in to_search:
    #         if int(year) == i['year']:
    #             data.update(i)
    return json.dumps(data)


def get_pub_info(name: str):
    database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
    with open(database_addr, 'r') as fin:
        all_members_info = json.load(fin)

    for i in all_members_info:
        if name.lower() in i['title'].lower():
            return json.dumps(i)
    return json.dumps({})


def get_pub_by_name(name: str):
    choices = set()
    database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
    with open(database_addr, 'r') as fin:
        all_members_info = json.load(fin)
    for i in all_members_info:
        for author in i['authors']:
            choices.add(author)
    best = process.extractOne(name, choices)

    database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
    with open(database_addr, 'r') as fin:
        all_members_info = json.load(fin)

    data = {}
    for i in all_members_info:
        for author in i['authors']:
            if best[0].lower() in author.lower():
                data.update(i)

    return json.dumps(data)


def semantic_search(query: str):
    response = openai.Embedding.create(model="text-embedding-ada-002", input=query)
    embedding = response['data'][0]['embedding']
    function_response = search_document(embedding, 3)
    return function_response


def search_downloads(input_title: str):
    # Search in downloads.json file based on title of video demos, code and datasets, and course materials
    download_fn = os.path.join(os.getcwd(), 'database/original_documents/parsed_downloads.json')
    with open(download_fn, 'r') as fin:
        all_download_info = json.load(fin)
    choices = set()
    for i in all_download_info.keys():
        choices.add(i)
    best = process.extractOne(input_title, choices)
    data = {}
    for title, entry in all_download_info.items():
        if best[0].lower() in title.lower():
            data.update(entry)

    return json.dumps(data)


def get_member_list_by_edu_status(criterion: str):
    # Get list of member info by education level and status (whether still in the lab).
    # Choices for criterion are ['undergrad', 'current_phd', 'director', 'former_postdoc', 'former_visiting', 'graduated', 'master']

    member_json = os.path.join(os.getcwd(), f'database/original_documents/members/{criterion}.json')
    with open(member_json, 'r') as fin:
        member_info_list = json.load(fin)

    return json.dumps(member_info_list)