File size: 2,978 Bytes
d43b410
8bb66b9
 
 
a2a0721
d43b410
0e29746
d43b410
 
a2a0721
d43b410
 
0e29746
d43b410
a2a0721
d43b410
 
 
 
 
 
 
 
 
 
a2a0721
d43b410
 
25b1dfe
 
a2a0721
d43b410
6e06674
 
d43b410
8bb66b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d43b410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_databricks.vectorstores import DatabricksVectorSearch

DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
DATABRICKS_TOKEN = os.environ.get("DATABRICKS_TOKEN")
VS_ENDPOINT_NAME = os.environ.get("VS_ENDPOINT_NAME")
VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME")

if DATABRICKS_HOST is None:
    raise ValueError("DATABRICKS_HOST environment variable must be set")
if DATABRICKS_TOKEN is None:
    raise ValueError("DATABRICKS_API_TOKEN environment variable must be set")

TITLE = "VUMC Chatbot"
DESCRIPTION="The first generation VUMC chatbot with knowledge of Vanderbilt specific terms."
EXAMPLE_PROMPTS = [
    "Write a short story about a robot that has a nice day.",
    "In a table, what are some of the most common misconceptions about birds?",
    "Give me a recipe for vegan banana bread.",
    "Code a python function that can run merge sort on a list.",
    "Give me the character profile of a gumdrop obsessed knight in JSON.",
    "Write a rap battle between Alan Turing and Claude Shannon.",
]

st.set_page_config(layout="wide")
st.title(TITLE)
st.markdown(DESCRIPTION)
st.markdown("\n")

# use this to format later
with open("style.css") as css:
    st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)

# Same embedding model we used to create embeddings of terms
# make sure we cache this so that it doesnt redownload each time, hindering Space start time if sleeping
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en", cache_folder="./langchain_cache/")

vector_store = DatabricksVectorSearch(
    endpoint=VS_ENDPOINT_NAME,
    index_name=VS_INDEX_NAME,
    embedding=embeddings,
    text_column="name",
    columns=["name", "description"],
)

results = vector_store.similarity_search(query="Tell me about what a data lake is.", k=5)
st.write(results)

# DBRX mainbody minus functions

# main = st.container()
# with main:
#     history = st.container(height=400)
#     with history:
#         for message in st.session_state["messages"]:
#             avatar = None
#             if message["role"] == "assistant":
#                 avatar = MODEL_AVATAR_URL
#             with st.chat_message(message["role"],avatar=avatar):
#                 if message["content"] is not None:
#                     st.markdown(message["content"])
#                 if message["error"] is not None:
#                     st.error(message["error"],icon="🚨")
#                 if message["warning"] is not None:
#                     st.warning(message["warning"],icon="⚠️")

#     if prompt := st.chat_input("Type a message!", max_chars=1000):
#         handle_user_input(prompt)
#     st.markdown("\n") #add some space for iphone users

# with st.sidebar:
#     with st.container():
#         st.title("Examples")
#         for prompt in EXAMPLE_PROMPTS:
#             st.button(prompt, args=(prompt,), on_click=handle_user_input)