Spaces:
Sleeping
Sleeping
steamlit app
Browse files- chatbot.py +14 -43
- database.py +2 -2
- main.py +47 -0
- preprocess.py +4 -5
- requirements.txt +3 -0
chatbot.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
from langchain.prompts import PromptTemplate
|
2 |
-
from langchain
|
3 |
-
from langchain.embeddings import OpenAIEmbeddings
|
4 |
from langchain.chains import LLMChain
|
5 |
from langchain.memory import ConversationBufferMemory
|
6 |
from redis.commands.search.query import Query
|
@@ -8,46 +7,25 @@ import time
|
|
8 |
import os
|
9 |
from dotenv import load_dotenv
|
10 |
import numpy as np
|
11 |
-
from database import redis_conn
|
12 |
-
|
13 |
load_dotenv()
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
|
16 |
prompt = PromptTemplate(
|
17 |
input_variables=["product_description"],
|
18 |
template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
|
19 |
)
|
20 |
|
21 |
-
chain = LLMChain(llm=
|
22 |
-
|
23 |
-
userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
|
24 |
-
print("User:", userinput)
|
25 |
-
# Run the chain only specifying the input variable.
|
26 |
-
keywords = chain.run(userinput)
|
27 |
-
|
28 |
-
embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
|
29 |
-
#vectorize the query
|
30 |
-
query_vector = embedding_model.embed_query(keywords)
|
31 |
-
query_vector = np.array(query_vector).astype(np.float32).tobytes()
|
32 |
-
|
33 |
-
|
34 |
-
#prepare the query
|
35 |
-
ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
|
36 |
-
topK=5
|
37 |
-
q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
|
38 |
-
params_dict = {"vec_param": query_vector}
|
39 |
-
#Execute the query
|
40 |
-
results = redis_conn.ft().search(q, query_params = params_dict)
|
41 |
-
|
42 |
-
full_result_string = ''
|
43 |
-
for product in results.docs:
|
44 |
-
full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
|
45 |
|
46 |
# code The response
|
47 |
-
|
|
|
48 |
|
49 |
{chat_history}
|
50 |
-
|
51 |
Chatbot:"""
|
52 |
|
53 |
prompt = PromptTemplate(
|
@@ -55,22 +33,15 @@ prompt = PromptTemplate(
|
|
55 |
template=template
|
56 |
)
|
57 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
|
|
58 |
llm_chain = LLMChain(
|
59 |
-
llm=
|
60 |
prompt=prompt,
|
61 |
verbose=False,
|
62 |
memory=memory,
|
63 |
)
|
64 |
|
65 |
-
answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
|
66 |
-
print("Bot:", answer)
|
67 |
-
time.sleep(0.5)
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
answer = llm_chain.predict(
|
73 |
-
user_msg=follow_up
|
74 |
-
)
|
75 |
-
print("Bot:", answer)
|
76 |
-
time.sleep(0.5)
|
|
|
1 |
from langchain.prompts import PromptTemplate
|
2 |
+
from langchain import HuggingFaceHub
|
|
|
3 |
from langchain.chains import LLMChain
|
4 |
from langchain.memory import ConversationBufferMemory
|
5 |
from redis.commands.search.query import Query
|
|
|
7 |
import os
|
8 |
from dotenv import load_dotenv
|
9 |
import numpy as np
|
|
|
|
|
10 |
load_dotenv()
|
11 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
12 |
+
repo_id = 'tiiuae/falcon-7b-instruct'
|
13 |
+
|
14 |
+
falcon_llm_1 = HuggingFaceHub(repo_id = repo_id, model_kwargs={'temperature':0.1,'max_new_tokens':500},huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
|
15 |
|
|
|
16 |
prompt = PromptTemplate(
|
17 |
input_variables=["product_description"],
|
18 |
template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
|
19 |
)
|
20 |
|
21 |
+
chain = LLMChain(llm=falcon_llm_1, prompt=prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# code The response
|
24 |
+
repo_id_2 = 'tiiuae/falcon-7b'
|
25 |
+
template = """You are a salesman. Be kind, detailed and nice. take the given context and Present the given queried search result in a nice way as answer to the user_msg. dont ask questions back or freestyle and invent followup conversation! just
|
26 |
|
27 |
{chat_history}
|
28 |
+
{user_msg}
|
29 |
Chatbot:"""
|
30 |
|
31 |
prompt = PromptTemplate(
|
|
|
33 |
template=template
|
34 |
)
|
35 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
36 |
+
|
37 |
llm_chain = LLMChain(
|
38 |
+
llm = HuggingFaceHub(repo_id = repo_id_2, model_kwargs={'temperature':0.8,'max_new_tokens':500}),
|
39 |
prompt=prompt,
|
40 |
verbose=False,
|
41 |
memory=memory,
|
42 |
)
|
43 |
|
|
|
|
|
|
|
44 |
|
45 |
+
|
46 |
+
|
47 |
+
|
|
|
|
|
|
|
|
|
|
database.py
CHANGED
@@ -8,8 +8,8 @@ redis_key = os.getenv('REDIS_KEY')
|
|
8 |
|
9 |
|
10 |
redis_conn = redis.Redis(
|
11 |
-
host='redis-
|
12 |
-
port=
|
13 |
password=redis_key)
|
14 |
|
15 |
print('connected to redis')
|
|
|
8 |
|
9 |
|
10 |
redis_conn = redis.Redis(
|
11 |
+
host='redis-12882.c259.us-central1-2.gce.cloud.redislabs.com',
|
12 |
+
port=12882,
|
13 |
password=redis_key)
|
14 |
|
15 |
print('connected to redis')
|
main.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from chatbot import llm_chain, chain
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from redis.commands.search.query import Query
|
5 |
+
from database import redis_conn
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
st.title('My Amazon shopping buddy π·οΈ')
|
11 |
+
st.caption('π€ Powered by Falcon Open Source AI model')
|
12 |
+
st.session_state['disabled']= False
|
13 |
+
|
14 |
+
if "messages" not in st.session_state:
|
15 |
+
st.session_state["messages"] = [{"role": "assistant", "content": "Hey im your online shopping buddy, how can i help you today?"}]
|
16 |
+
for msg in st.session_state["messages"]:
|
17 |
+
st.chat_message(msg["role"]).write(msg["content"])
|
18 |
+
|
19 |
+
prompt = st.chat_input(key="user_input",disabled=st.session_state.disabled )
|
20 |
+
embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
|
21 |
+
if prompt:
|
22 |
+
st.session_state["messages"].append({"role": "user", "content": prompt})
|
23 |
+
st.chat_message('user').write(prompt)
|
24 |
+
st.session_state.disabled = True
|
25 |
+
keywords = chain.run(prompt)
|
26 |
+
|
27 |
+
#vectorize the query
|
28 |
+
query_vector = embedding_model.encode(keywords)
|
29 |
+
query_vector = np.array(query_vector).astype(np.float32).tobytes()
|
30 |
+
#prepare the query
|
31 |
+
ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
|
32 |
+
topK=5
|
33 |
+
q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
|
34 |
+
params_dict = {"vec_param": query_vector}
|
35 |
+
#Execute the query
|
36 |
+
results = redis_conn.ft().search(q, query_params = params_dict)
|
37 |
+
|
38 |
+
full_result_string = ''
|
39 |
+
for product in results.docs:
|
40 |
+
full_result_string += product.item_name + ' ' + product.item_keywords + "\n\n\n"
|
41 |
+
|
42 |
+
result = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {prompt}")
|
43 |
+
st.session_state.messages.append({"role": "assistant", "content": result})
|
44 |
+
st.chat_message('assistant').write(result)
|
45 |
+
|
46 |
+
|
47 |
+
|
preprocess.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from langchain.embeddings import OpenAIEmbeddings
|
|
|
2 |
import os
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
@@ -6,8 +7,6 @@ from dotenv import load_dotenv
|
|
6 |
from database import redis_conn
|
7 |
from utilities import create_flat_index, load_vectors
|
8 |
|
9 |
-
load_dotenv()
|
10 |
-
openai_api_key = os.getenv("OPENAI_API_KEY")
|
11 |
|
12 |
#set maximum length for text fields
|
13 |
MAX_TEXT_LENGTH = 512
|
@@ -24,12 +23,12 @@ data.reset_index(drop=True, inplace=True)
|
|
24 |
data_metadata = data.head(500).to_dict(orient='index')
|
25 |
|
26 |
#generating embeddings (vectors) for the item keywords
|
27 |
-
|
28 |
-
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
29 |
|
30 |
#get the item keywords attribute for each product and encode them into vector embeddings
|
31 |
item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
|
32 |
-
item_keywords_vectors = [embedding_model.
|
33 |
|
34 |
TEXT_EMBEDDING_DIMENSION=768
|
35 |
NUMBER_PRODUCTS=500
|
|
|
1 |
from langchain.embeddings import OpenAIEmbeddings
|
2 |
+
from sentence_transformers import SentenceTransformer
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
|
|
7 |
from database import redis_conn
|
8 |
from utilities import create_flat_index, load_vectors
|
9 |
|
|
|
|
|
10 |
|
11 |
#set maximum length for text fields
|
12 |
MAX_TEXT_LENGTH = 512
|
|
|
23 |
data_metadata = data.head(500).to_dict(orient='index')
|
24 |
|
25 |
#generating embeddings (vectors) for the item keywords
|
26 |
+
embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
|
27 |
+
# embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
28 |
|
29 |
#get the item keywords attribute for each product and encode them into vector embeddings
|
30 |
item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
|
31 |
+
item_keywords_vectors = [embedding_model.encode(item) for item in item_keywords]
|
32 |
|
33 |
TEXT_EMBEDDING_DIMENSION=768
|
34 |
NUMBER_PRODUCTS=500
|
requirements.txt
CHANGED
@@ -3,3 +3,6 @@ openai == 0.27.8
|
|
3 |
redis == 5.0.1
|
4 |
pandas == 2.0.3
|
5 |
sentence-transformers == 2.2.2
|
|
|
|
|
|
|
|
3 |
redis == 5.0.1
|
4 |
pandas == 2.0.3
|
5 |
sentence-transformers == 2.2.2
|
6 |
+
tiktoken == 0.5.1
|
7 |
+
streamlit == 1.27.2
|
8 |
+
python-dotenv = 1.0.0
|