""" Util functions for openai api """ import json import os from thefuzz import process import openai from utils import search_document, search_document_annoy def get_lab_member_info(name: str): database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json') with open(database_addr, 'r') as fin: all_members_info = json.load(fin) choices = set() for field in all_members_info: toSearch = all_members_info[field] for i in toSearch: choices.add(i['name']) best = process.extractOne(name, choices) for field in all_members_info: toSearch = all_members_info[field] for i in toSearch: if best[0].lower() in i['name'].lower(): return json.dumps(i) return json.dumps({}) def get_lab_member_detailed_info(name: str, detailed_info: str): database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json') with open(database_addr, 'r') as fin: all_members_info = json.load(fin) choices = set() for field in all_members_info: toSearch = all_members_info[field] for i in toSearch: choices.add(i['name']) best = process.extractOne(name, choices) for field in all_members_info: toSearch = all_members_info[field] for i in toSearch: if best[0].lower() in i['name'].lower(): if "link" in detailed_info.lower() or "homepage" in detailed_info.lower(): return json.dumps(i['links']) elif "photo" in detailed_info.lower() or "pic" in detailed_info.lower() or "picture" in detailed_info.lower(): return json.dumps(i['photo']) else: return json.dumps(i["description"]) return json.dumps({}) def get_publication_by_year(year: str): database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') with open(database_addr, 'r') as fin: all_pub_info = json.load(fin) data = {} for pub in all_pub_info: if int(year) == pub['year']: data.update(pub) # for field in all_pub_info: # to_search = all_pub_info[field] # for i in to_search: # if int(year) == i['year']: # data.update(i) return json.dumps(data) def get_pub_info(name: str): database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') with open(database_addr, 'r') as fin: all_members_info = json.load(fin) for i in all_members_info: if name.lower() in i['title'].lower(): return json.dumps(i) return json.dumps({}) def get_pub_by_name(name: str): choices = set() database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') with open(database_addr, 'r') as fin: all_members_info = json.load(fin) for i in all_members_info: for author in i['authors']: choices.add(author) best = process.extractOne(name, choices) database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') with open(database_addr, 'r') as fin: all_members_info = json.load(fin) data = {} for i in all_members_info: for author in i['authors']: if best[0].lower() in author.lower(): data.update(i) return json.dumps(data) def semantic_search(query: str): response = openai.Embedding.create(model="text-embedding-ada-002", input=query) embedding = response['data'][0]['embedding'] function_response = search_document(embedding, 3) return function_response def search_downloads(input_title: str): # Search in downloads.json file based on title of video demos, code and datasets, and course materials download_fn = os.path.join(os.getcwd(), 'database/original_documents/parsed_downloads.json') with open(download_fn, 'r') as fin: all_download_info = json.load(fin) choices = set() for i in all_download_info.keys(): choices.add(i) best = process.extractOne(input_title, choices) data = {} for title, entry in all_download_info.items(): if best[0].lower() in title.lower(): data.update(entry) return json.dumps(data) def get_member_list_by_edu_status(criterion: str): # Get list of member info by education level and status (whether still in the lab). # Choices for criterion are ['undergrad', 'current_phd', 'director', 'former_postdoc', 'former_visiting', 'graduated', 'master'] member_json = os.path.join(os.getcwd(), f'database/original_documents/members/{criterion}.json') with open(member_json, 'r') as fin: member_info_list = json.load(fin) return json.dumps(member_info_list)