|
import os |
|
import openai |
|
import tiktoken |
|
import numpy as np |
|
import ast |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import gradio as gr |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
|
openai.api_key = OPENAI_API_KEY |
|
client = openai |
|
|
|
|
|
tokenizer = tiktoken.get_encoding('p50k_base') |
|
|
|
def get_embedding(text, model='text-embedding-3-small', max_tokens=7000): |
|
|
|
tokens = tokenizer.encode(text) |
|
if len(tokens) > max_tokens: |
|
tokens = tokens[:max_tokens] |
|
text = tokenizer.decode(tokens) |
|
|
|
return client.embeddings.create(input=[text],model=model).data[0].embedding |
|
|
|
data = pd.read_csv("ucdavis_health_embeddings.csv") |
|
|
|
|
|
def safe_literal_eval(x): |
|
try: |
|
return ast.literal_eval(x) |
|
except (ValueError, SyntaxError): |
|
return [] |
|
|
|
data['embedding'] = data['embedding'].apply(safe_literal_eval) |
|
|
|
|
|
data['embedding'] = data['embedding'].apply(lambda x: [float(i) for i in x] if isinstance(x, list) else []) |
|
data = data[data['embedding'].apply(lambda x: len(x) > 0)] |
|
|
|
|
|
def query(question): |
|
question_embedding = get_embedding(question) |
|
|
|
def fn(page_embedding): |
|
return np.dot(page_embedding, question_embedding) |
|
|
|
distance_series = data['embedding'].apply(fn) |
|
|
|
top_four = distance_series.sort_values(ascending=False).index[0:4] |
|
|
|
context_series = data.loc[top_four]['text'] |
|
context = " ".join(context_series) |
|
similarity_scores = distance_series.sort_values(ascending=False)[0:4] |
|
links_series = data.loc[top_four]['url'] |
|
links = "\n \n".join(links_series) |
|
link_list = links_series.tolist() |
|
|
|
chat_completion = client.chat.completions.create( |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant tasked to respond to users of UC Davos Health who are seeking information about their services"}, |
|
{"role": "user", "content": question}, |
|
{"role": "assistant", "content": f"Use this information from the UC Davis Health website and answer the user's question: {context}. Please stick to this context while answering the question. Include all important information relevant to what the user is seeking, also tell them things they should be mindful of while following instructions. Don't miss any details about timings or weekdays."} |
|
], |
|
model="gpt-3.5-turbo" |
|
) |
|
|
|
return chat_completion.choices[0].message.content, links, similarity_scores.tolist(), link_list |
|
|
|
def plot_bar_chart(similarity_scores, links_series): |
|
|
|
sorted_pairs = sorted(zip(similarity_scores, links_series)) |
|
sorted_scores, sorted_links = zip(*sorted_pairs) |
|
|
|
|
|
link_labels = [f"Link {i+1}" for i in range(len(sorted_links)-1, -1, -1)] |
|
|
|
plt.figure(figsize=(12, 8)) |
|
bars = plt.barh(link_labels, sorted_scores, color='skyblue', edgecolor='black') |
|
plt.xlabel('Similarity Score') |
|
plt.ylabel('Links') |
|
plt.title('Similarity Scores Bar Chart for the above links in the same order') |
|
plt.xlim(0, 1) |
|
plt.grid(True, axis='x') |
|
|
|
|
|
for bar, score in zip(bars, sorted_scores): |
|
plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height() / 2, |
|
f'{score:.2f}', va='center', ha='left') |
|
|
|
plt.tight_layout() |
|
plt.savefig('bar_chart.png') |
|
return 'bar_chart.png' |
|
|
|
|
|
def gradio_query(question): |
|
answer, links, similarity_scores, link_list = query(question) |
|
bar_plot_path = plot_bar_chart(similarity_scores, link_list) |
|
return answer, links, bar_plot_path |
|
|
|
interface = gr.Interface( |
|
fn=gradio_query, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."), |
|
outputs=[ |
|
gr.Textbox(label="Answer"), |
|
gr.Textbox(label="For more information, visit these links"), |
|
gr.Image(type="filepath", label="Similarity Scores Bar Chart", elem_id="bar_chart") |
|
], |
|
title="UC Davis Health Query Assistant", |
|
description="Ask your questions about UC Davis Health services and get relevant information from their website.", |
|
css=".gradio-container #bar_chart img {width: 200%; height: auto;}" |
|
) |
|
|
|
|
|
interface.launch(share=True) |