Ask-ANRG / openai_function_utils /openai_function_impl.py
FloraJ's picture
update good bad example
2bacb33
raw
history blame
4.84 kB
"""
Util functions for openai api
"""
import json
import os
from thefuzz import process
import openai
from utils import search_document, search_document_annoy
def get_lab_member_info(name: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
choices = set()
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
choices.add(i['name'])
best = process.extractOne(name, choices)
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
if best[0].lower() in i['name'].lower():
return json.dumps(i)
return json.dumps({})
def get_lab_member_detailed_info(name: str, detailed_info: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
choices = set()
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
choices.add(i['name'])
best = process.extractOne(name, choices)
for field in all_members_info:
toSearch = all_members_info[field]
for i in toSearch:
if best[0].lower() in i['name'].lower():
if "link" in detailed_info.lower() or "homepage" in detailed_info.lower():
return json.dumps(i['links'])
elif "photo" in detailed_info.lower() or "pic" in detailed_info.lower() or "picture" in detailed_info.lower():
return json.dumps(i['photo'])
else:
return json.dumps(i["description"])
return json.dumps({})
def get_publication_by_year(year: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_pub_info = json.load(fin)
data = {}
for pub in all_pub_info:
if int(year) == pub['year']:
data.update(pub)
# for field in all_pub_info:
# to_search = all_pub_info[field]
# for i in to_search:
# if int(year) == i['year']:
# data.update(i)
return json.dumps(data)
def get_pub_info(name: str):
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
for i in all_members_info:
if name.lower() in i['title'].lower():
return json.dumps(i)
return json.dumps({})
def get_pub_by_name(name: str):
choices = set()
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
for i in all_members_info:
for author in i['authors']:
choices.add(author)
best = process.extractOne(name, choices)
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json')
with open(database_addr, 'r') as fin:
all_members_info = json.load(fin)
data = {}
for i in all_members_info:
for author in i['authors']:
if best[0].lower() in author.lower():
data.update(i)
return json.dumps(data)
def semantic_search(query: str):
response = openai.Embedding.create(model="text-embedding-ada-002", input=query)
embedding = response['data'][0]['embedding']
function_response = search_document(embedding, 3)
return function_response
def search_downloads(input_title: str):
# Search in downloads.json file based on title of video demos, code and datasets, and course materials
download_fn = os.path.join(os.getcwd(), 'database/original_documents/parsed_downloads.json')
with open(download_fn, 'r') as fin:
all_download_info = json.load(fin)
choices = set()
for i in all_download_info.keys():
choices.add(i)
best = process.extractOne(input_title, choices)
data = {}
for title, entry in all_download_info.items():
if best[0].lower() in title.lower():
data.update(entry)
return json.dumps(data)
def get_member_list_by_edu_status(criterion: str):
# Get list of member info by education level and status (whether still in the lab).
# Choices for criterion are ['undergrad', 'current_phd', 'director', 'former_postdoc', 'former_visiting', 'graduated', 'master']
member_json = os.path.join(os.getcwd(), f'database/original_documents/members/{criterion}.json')
with open(member_json, 'r') as fin:
member_info_list = json.load(fin)
return json.dumps(member_info_list)