Spaces:
Build error
Build error
Reformated code; updated linting
Browse files- app.py +4 -47
- chatbot/__init__.py +0 -0
- chatbot/utils.py +50 -0
- setup.cfg +2 -1
- shell/format.sh +3 -3
- shell/lint.sh +3 -3
app.py
CHANGED
|
@@ -1,14 +1,8 @@
|
|
| 1 |
import json
|
| 2 |
-
import os
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
from llama_index import SimpleDirectoryReader
|
| 8 |
-
from llama_index import VectorStoreIndex
|
| 9 |
-
from llama_index import set_global_service_context
|
| 10 |
-
from llama_index.embeddings import OpenAIEmbedding
|
| 11 |
-
from llama_index.llms import AzureOpenAI
|
| 12 |
|
| 13 |
# Initialize message history
|
| 14 |
st.header("Chat with André's research 💬 📚")
|
|
@@ -21,47 +15,10 @@ with open(r"config.json") as config_file:
|
|
| 21 |
config_details = json.load(config_file)
|
| 22 |
|
| 23 |
|
| 24 |
-
def download_test_data():
|
| 25 |
-
url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
|
| 26 |
-
with st.spinner(text="Downloading test data. Might take a few seconds."):
|
| 27 |
-
download_folder(url, quiet=True, use_cookies=False, output="./data/")
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
@st.cache_resource(show_spinner=False)
|
| 31 |
-
def load_data():
|
| 32 |
-
with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
|
| 33 |
-
documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
|
| 34 |
-
llm = AzureOpenAI(
|
| 35 |
-
model="gpt-3.5-turbo",
|
| 36 |
-
engine=config_details["ENGINE"],
|
| 37 |
-
temperature=0.5,
|
| 38 |
-
api_key=os.getenv("OPENAI_API_KEY"),
|
| 39 |
-
api_base=config_details["OPENAI_API_BASE"],
|
| 40 |
-
api_type="azure",
|
| 41 |
-
api_version=config_details["OPENAI_API_VERSION"],
|
| 42 |
-
system_prompt="You are an expert on André's research and your job is to answer"
|
| 43 |
-
"technical questions. Assume that all questions are related to"
|
| 44 |
-
"André's research. Keep your answers technical and based on facts"
|
| 45 |
-
" – do not hallucinate features.",
|
| 46 |
-
)
|
| 47 |
-
# You need to deploy your own embedding model as well as your own chat completion model
|
| 48 |
-
embed_model = OpenAIEmbedding(
|
| 49 |
-
model="text-embedding-ada-002",
|
| 50 |
-
deployment_name=config_details["ENGINE_EMBEDDING"],
|
| 51 |
-
api_key=os.getenv("OPENAI_API_KEY"),
|
| 52 |
-
api_base=config_details["OPENAI_API_BASE"],
|
| 53 |
-
api_type="azure",
|
| 54 |
-
api_version=config_details["OPENAI_API_VERSION"],
|
| 55 |
-
)
|
| 56 |
-
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
| 57 |
-
set_global_service_context(service_context)
|
| 58 |
-
index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
|
| 59 |
-
return index
|
| 60 |
-
|
| 61 |
-
|
| 62 |
def main():
|
|
|
|
| 63 |
download_test_data()
|
| 64 |
-
index = load_data()
|
| 65 |
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
|
| 66 |
|
| 67 |
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
|
|
|
|
| 1 |
import json
|
|
|
|
| 2 |
|
| 3 |
import streamlit as st
|
| 4 |
+
from src.utils import download_test_data
|
| 5 |
+
from src.utils import load_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Initialize message history
|
| 8 |
st.header("Chat with André's research 💬 📚")
|
|
|
|
| 15 |
config_details = json.load(config_file)
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def main():
|
| 19 |
+
# setup dataset
|
| 20 |
download_test_data()
|
| 21 |
+
index = load_data(config_details)
|
| 22 |
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
|
| 23 |
|
| 24 |
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
|
chatbot/__init__.py
ADDED
|
File without changes
|
chatbot/utils.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from gdown import download_folder
|
| 5 |
+
from llama_index import ServiceContext
|
| 6 |
+
from llama_index import SimpleDirectoryReader
|
| 7 |
+
from llama_index import VectorStoreIndex
|
| 8 |
+
from llama_index import set_global_service_context
|
| 9 |
+
from llama_index.embeddings import OpenAIEmbedding
|
| 10 |
+
from llama_index.llms import AzureOpenAI
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@st.cache_resource(show_spinner=False)
|
| 14 |
+
def download_test_data():
|
| 15 |
+
# url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
|
| 16 |
+
url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
|
| 17 |
+
with st.spinner(text="Downloading test data. Might take a few seconds."):
|
| 18 |
+
download_folder(url=url, quiet=False, use_cookies=False, output="./data/")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@st.cache_resource(show_spinner=False)
|
| 22 |
+
def load_data(config_details):
|
| 23 |
+
with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
|
| 24 |
+
documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
|
| 25 |
+
llm = AzureOpenAI(
|
| 26 |
+
model="gpt-3.5-turbo",
|
| 27 |
+
engine=config_details["ENGINE"],
|
| 28 |
+
temperature=0.5,
|
| 29 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
| 30 |
+
api_base=config_details["OPENAI_API_BASE"],
|
| 31 |
+
api_type="azure",
|
| 32 |
+
api_version=config_details["OPENAI_API_VERSION"],
|
| 33 |
+
system_prompt="You are an expert on André's research and your job is to answer"
|
| 34 |
+
"technical questions. Assume that all questions are related to"
|
| 35 |
+
"André's research. Keep your answers technical and based on facts"
|
| 36 |
+
" – do not hallucinate features.",
|
| 37 |
+
)
|
| 38 |
+
# You need to deploy your own embedding model as well as your own chat completion model
|
| 39 |
+
embed_model = OpenAIEmbedding(
|
| 40 |
+
model="text-embedding-ada-002",
|
| 41 |
+
deployment_name=config_details["ENGINE_EMBEDDING"],
|
| 42 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
| 43 |
+
api_base=config_details["OPENAI_API_BASE"],
|
| 44 |
+
api_type="azure",
|
| 45 |
+
api_version=config_details["OPENAI_API_VERSION"],
|
| 46 |
+
)
|
| 47 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
| 48 |
+
set_global_service_context(service_context)
|
| 49 |
+
index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
|
| 50 |
+
return index
|
setup.cfg
CHANGED
|
@@ -3,7 +3,7 @@ description-file = README.md
|
|
| 3 |
|
| 4 |
[isort]
|
| 5 |
force_single_line=True
|
| 6 |
-
known_first_party=
|
| 7 |
line_length=120
|
| 8 |
profile=black
|
| 9 |
|
|
@@ -12,3 +12,4 @@ profile=black
|
|
| 12 |
per-file-ignores=*__init__.py:F401
|
| 13 |
ignore=E203,W503,W605,F632,E266,E731,E712,E741
|
| 14 |
max-line-length=120
|
|
|
|
|
|
| 3 |
|
| 4 |
[isort]
|
| 5 |
force_single_line=True
|
| 6 |
+
known_first_party=chatbot
|
| 7 |
line_length=120
|
| 8 |
profile=black
|
| 9 |
|
|
|
|
| 12 |
per-file-ignores=*__init__.py:F401
|
| 13 |
ignore=E203,W503,W605,F632,E266,E731,E712,E741
|
| 14 |
max-line-length=120
|
| 15 |
+
exclude=venv/
|
shell/format.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
-
isort --sl
|
| 3 |
-
black --line-length 120
|
| 4 |
-
flake8
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
isort --sl .
|
| 3 |
+
black --line-length 120 .
|
| 4 |
+
flake8 .
|
shell/lint.sh
CHANGED
|
@@ -1,19 +1,19 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
-
isort --check --sl -c
|
| 3 |
if ! [ $? -eq 0 ]
|
| 4 |
then
|
| 5 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
| 6 |
exit 1
|
| 7 |
fi
|
| 8 |
echo "no issues with isort"
|
| 9 |
-
flake8
|
| 10 |
if ! [ $? -eq 0 ]
|
| 11 |
then
|
| 12 |
echo "Please fix the code style issue."
|
| 13 |
exit 1
|
| 14 |
fi
|
| 15 |
echo "no issues with flake8"
|
| 16 |
-
black --check --line-length 120
|
| 17 |
if ! [ $? -eq 0 ]
|
| 18 |
then
|
| 19 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
isort --check --sl -c .
|
| 3 |
if ! [ $? -eq 0 ]
|
| 4 |
then
|
| 5 |
echo "Please run \"sh shell/format.sh\" to format the code."
|
| 6 |
exit 1
|
| 7 |
fi
|
| 8 |
echo "no issues with isort"
|
| 9 |
+
flake8 .
|
| 10 |
if ! [ $? -eq 0 ]
|
| 11 |
then
|
| 12 |
echo "Please fix the code style issue."
|
| 13 |
exit 1
|
| 14 |
fi
|
| 15 |
echo "no issues with flake8"
|
| 16 |
+
black --check --line-length 120 .
|
| 17 |
if ! [ $? -eq 0 ]
|
| 18 |
then
|
| 19 |
echo "Please run \"sh shell/format.sh\" to format the code."
|