File size: 4,842 Bytes
de3c2ee 24b5b67 2bacb33 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee ba91b26 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 de3c2ee 24b5b67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
"""
Util functions for openai api
"""
import json
import os
from thefuzz import process
import openai
from utils import search_document, search_document_annoy
def get_lab_member_info(name: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
choices = set()
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
choices.add(i['name'])
best = process.extractOne(name, choices)
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
if best[0].lower() in i['name'].lower():
return json.dumps(i)
return json.dumps({})
def get_lab_member_detailed_info(name: str, detailed_info: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
choices = set()
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
choices.add(i['name'])
best = process.extractOne(name, choices)
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
if best[0].lower() in i['name'].lower():
if "link" in detailed_info.lower() or "homepage" in detailed_info.lower():
return json.dumps(i['links'])
elif "photo" in detailed_info.lower() or "pic" in detailed_info.lower() or "picture" in detailed_info.lower():
return json.dumps(i['photo'])
else:
return json.dumps(i["description"])
return json.dumps({})
def get_publication_by_year(year: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_pub_info = json.load(fin)
data = {}
for pub in all_pub_info:
if int(year) == pub['year']:
data.update(pub)
# for field in all_pub_info:
# to_search = all_pub_info[field]
# for i in to_search:
# if int(year) == i['year']:
# data.update(i)
return json.dumps(data)
def get_pub_info(name: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
for i in all_members_info:
if name.lower() in i['title'].lower():
return json.dumps(i)
return json.dumps({})
def get_pub_by_name(name: str):
choices = set()
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
for i in all_members_info:
for author in i['authors']:
choices.add(author)
best = process.extractOne(name, choices)
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
data = {}
for i in all_members_info:
for author in i['authors']:
if best[0].lower() in author.lower():
data.update(i)
return json.dumps(data)
def semantic_search(query: str):
response = openai.Embedding.create(model="text-embedding-ada-002", input=query)
embedding = response['data'][0]['embedding']
function_response = search_document(embedding, 3)
return function_response
def search_downloads(input_title: str):
# Search in downloads.json file based on title of video demos, code and datasets, and course materials
download_fn = os.path.join(os.getcwd(), 'database/original_documents/parsed_downloads.json')
with open(download_fn, 'r') as fin:
all_download_info = json.load(fin)
choices = set()
for i in all_download_info.keys():
choices.add(i)
best = process.extractOne(input_title, choices)
data = {}
for title, entry in all_download_info.items():
if best[0].lower() in title.lower():
data.update(entry)
return json.dumps(data)
def get_member_list_by_edu_status(criterion: str):
# Get list of member info by education level and status (whether still in the lab).
# Choices for criterion are ['undergrad', 'current_phd', 'director', 'former_postdoc', 'former_visiting', 'graduated', 'master']
member_json = os.path.join(os.getcwd(), f'database/original_documents/members/{criterion}.json')
with open(member_json, 'r') as fin:
member_info_list = json.load(fin)
return json.dumps(member_info_list)
|