Spaces:

MarioBarbeque
/

VanderbiltGlossary

Sleeping

John Graham Reynolds

correct dependency issue, add idea comment

3a94a93 10 months ago

3.33 kB

	import streamlit as st
	import os
	from mlflow import deployments
	from databricks.vector_search.client import VectorSearchClient

	DATABRICKS_HOST = os.environ.get("DATABRICKS_HOST")
	DATABRICKS_API_TOKEN = os.environ.get("DATABRICKS_API_TOKEN")
	VS_ENDPOINT_NAME = os.environ.get("VS_ENDPOINT_NAME")
	VS_INDEX_NAME = os.environ.get("VS_INDEX_NAME")

	if DATABRICKS_HOST is None:
	raise ValueError("DATABRICKS_HOST environment variable must be set")
	if DATABRICKS_API_TOKEN is None:
	raise ValueError("DATABRICKS_API_TOKEN environment variable must be set")

	TITLE = "VUMC Chatbot"
	DESCRIPTION="The first generation VUMC chatbot with knowledge of Vanderbilt specific terms."
	EXAMPLE_PROMPTS = [
	"Write a short story about a robot that has a nice day.",
	"In a table, what are some of the most common misconceptions about birds?",
	"Give me a recipe for vegan banana bread.",
	"Code a python function that can run merge sort on a list.",
	"Give me the character profile of a gumdrop obsessed knight in JSON.",
	"Write a rap battle between Alan Turing and Claude Shannon.",
	]

	st.set_page_config(layout="wide")
	st.title(TITLE)

	# test env vars get output correctly - do we need to configure PAT access further?
	st.markdown(DESCRIPTION)
	st.markdown("\n")
	st.write(DATABRICKS_HOST)
	st.markdown("\n")
	st.write(VS_ENDPOINT_NAME)
	st.markdown("\n")

	# use this to format later
	# with open("style.css") as css:
	# st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)


	# TODO *** configure to run only on prompt for verification?
	vsc = VectorSearchClient()

	question = "What is the data lake?"
	# question_2 = "What does EDW stand for?"
	# question_3 = "What does AIDET stand for?"

	deploy_client = deployments.get_deploy_client("databricks")
	response = deploy_client.predict(endpoint="databricks-bge-large-en", inputs={"input": [question]})
	embeddings = [e['embedding'] for e in response.data]

	results = vsc.get_index(VS_ENDPOINT_NAME, VS_INDEX_NAME).similarity_search(
	query_vector=embeddings[0],
	columns=["name", "description"],
	num_results=5)

	st.write(results)


	# print(results)
	# print("---------------------------------------")
	# vumc_terms = results.get('result', {}).get('data_array', [])
	# print(vumc_terms)


	# DBRX mainbody minus functions

	# main = st.container()
	# with main:
	# history = st.container(height=400)
	# with history:
	# for message in st.session_state["messages"]:
	# avatar = None
	# if message["role"] == "assistant":
	# avatar = MODEL_AVATAR_URL
	# with st.chat_message(message["role"],avatar=avatar):
	# if message["content"] is not None:
	# st.markdown(message["content"])
	# if message["error"] is not None:
	# st.error(message["error"],icon="🚨")
	# if message["warning"] is not None:
	# st.warning(message["warning"],icon="⚠️")

	# if prompt := st.chat_input("Type a message!", max_chars=1000):
	# handle_user_input(prompt)
	# st.markdown("\n") #add some space for iphone users

	# with st.sidebar:
	# with st.container():
	# st.title("Examples")
	# for prompt in EXAMPLE_PROMPTS:
	# st.button(prompt, args=(prompt,), on_click=handle_user_input)