RomyMy commited on
Commit
0a5759a
Β·
1 Parent(s): cb85932

steamlit app

Browse files
Files changed (5) hide show
  1. chatbot.py +14 -43
  2. database.py +2 -2
  3. main.py +47 -0
  4. preprocess.py +4 -5
  5. requirements.txt +3 -0
chatbot.py CHANGED
@@ -1,6 +1,5 @@
1
  from langchain.prompts import PromptTemplate
2
- from langchain.llms import OpenAI
3
- from langchain.embeddings import OpenAIEmbeddings
4
  from langchain.chains import LLMChain
5
  from langchain.memory import ConversationBufferMemory
6
  from redis.commands.search.query import Query
@@ -8,46 +7,25 @@ import time
8
  import os
9
  from dotenv import load_dotenv
10
  import numpy as np
11
- from database import redis_conn
12
-
13
  load_dotenv()
 
 
 
 
14
 
15
- llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
16
  prompt = PromptTemplate(
17
  input_variables=["product_description"],
18
  template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
19
  )
20
 
21
- chain = LLMChain(llm=llm, prompt=prompt)
22
-
23
- userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
24
- print("User:", userinput)
25
- # Run the chain only specifying the input variable.
26
- keywords = chain.run(userinput)
27
-
28
- embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
29
- #vectorize the query
30
- query_vector = embedding_model.embed_query(keywords)
31
- query_vector = np.array(query_vector).astype(np.float32).tobytes()
32
-
33
-
34
- #prepare the query
35
- ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
36
- topK=5
37
- q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
38
- params_dict = {"vec_param": query_vector}
39
- #Execute the query
40
- results = redis_conn.ft().search(q, query_params = params_dict)
41
-
42
- full_result_string = ''
43
- for product in results.docs:
44
- full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
45
 
46
  # code The response
47
- template = """You are a chatbot. Be kind, detailed and nice. Present the given queried search result in a nice way as answer to the user input. dont ask questions back! just take the given context
 
48
 
49
  {chat_history}
50
- Human: {user_msg}
51
  Chatbot:"""
52
 
53
  prompt = PromptTemplate(
@@ -55,22 +33,15 @@ prompt = PromptTemplate(
55
  template=template
56
  )
57
  memory = ConversationBufferMemory(memory_key="chat_history")
 
58
  llm_chain = LLMChain(
59
- llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=os.getenv('OPENAI_API_KEY')),
60
  prompt=prompt,
61
  verbose=False,
62
  memory=memory,
63
  )
64
 
65
- answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
66
- print("Bot:", answer)
67
- time.sleep(0.5)
68
 
69
- while True:
70
- follow_up = input("Anything else you want to ask about this topic?")
71
- print("User:", follow_up)
72
- answer = llm_chain.predict(
73
- user_msg=follow_up
74
- )
75
- print("Bot:", answer)
76
- time.sleep(0.5)
 
1
  from langchain.prompts import PromptTemplate
2
+ from langchain import HuggingFaceHub
 
3
  from langchain.chains import LLMChain
4
  from langchain.memory import ConversationBufferMemory
5
  from redis.commands.search.query import Query
 
7
  import os
8
  from dotenv import load_dotenv
9
  import numpy as np
 
 
10
  load_dotenv()
11
+ HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
12
+ repo_id = 'tiiuae/falcon-7b-instruct'
13
+
14
+ falcon_llm_1 = HuggingFaceHub(repo_id = repo_id, model_kwargs={'temperature':0.1,'max_new_tokens':500},huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN)
15
 
 
16
  prompt = PromptTemplate(
17
  input_variables=["product_description"],
18
  template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
19
  )
20
 
21
+ chain = LLMChain(llm=falcon_llm_1, prompt=prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # code The response
24
+ repo_id_2 = 'tiiuae/falcon-7b'
25
+ template = """You are a salesman. Be kind, detailed and nice. take the given context and Present the given queried search result in a nice way as answer to the user_msg. dont ask questions back or freestyle and invent followup conversation! just
26
 
27
  {chat_history}
28
+ {user_msg}
29
  Chatbot:"""
30
 
31
  prompt = PromptTemplate(
 
33
  template=template
34
  )
35
  memory = ConversationBufferMemory(memory_key="chat_history")
36
+
37
  llm_chain = LLMChain(
38
+ llm = HuggingFaceHub(repo_id = repo_id_2, model_kwargs={'temperature':0.8,'max_new_tokens':500}),
39
  prompt=prompt,
40
  verbose=False,
41
  memory=memory,
42
  )
43
 
 
 
 
44
 
45
+
46
+
47
+
 
 
 
 
 
database.py CHANGED
@@ -8,8 +8,8 @@ redis_key = os.getenv('REDIS_KEY')
8
 
9
 
10
  redis_conn = redis.Redis(
11
- host='redis-10923.c10.us-east-1-4.ec2.cloud.redislabs.com',
12
- port=10923,
13
  password=redis_key)
14
 
15
  print('connected to redis')
 
8
 
9
 
10
  redis_conn = redis.Redis(
11
+ host='redis-12882.c259.us-central1-2.gce.cloud.redislabs.com',
12
+ port=12882,
13
  password=redis_key)
14
 
15
  print('connected to redis')
main.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from chatbot import llm_chain, chain
3
+ from sentence_transformers import SentenceTransformer
4
+ from redis.commands.search.query import Query
5
+ from database import redis_conn
6
+ import numpy as np
7
+
8
+
9
+
10
+ st.title('My Amazon shopping buddy 🏷️')
11
+ st.caption('πŸ€– Powered by Falcon Open Source AI model')
12
+ st.session_state['disabled']= False
13
+
14
+ if "messages" not in st.session_state:
15
+ st.session_state["messages"] = [{"role": "assistant", "content": "Hey im your online shopping buddy, how can i help you today?"}]
16
+ for msg in st.session_state["messages"]:
17
+ st.chat_message(msg["role"]).write(msg["content"])
18
+
19
+ prompt = st.chat_input(key="user_input",disabled=st.session_state.disabled )
20
+ embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
21
+ if prompt:
22
+ st.session_state["messages"].append({"role": "user", "content": prompt})
23
+ st.chat_message('user').write(prompt)
24
+ st.session_state.disabled = True
25
+ keywords = chain.run(prompt)
26
+
27
+ #vectorize the query
28
+ query_vector = embedding_model.encode(keywords)
29
+ query_vector = np.array(query_vector).astype(np.float32).tobytes()
30
+ #prepare the query
31
+ ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
32
+ topK=5
33
+ q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
34
+ params_dict = {"vec_param": query_vector}
35
+ #Execute the query
36
+ results = redis_conn.ft().search(q, query_params = params_dict)
37
+
38
+ full_result_string = ''
39
+ for product in results.docs:
40
+ full_result_string += product.item_name + ' ' + product.item_keywords + "\n\n\n"
41
+
42
+ result = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {prompt}")
43
+ st.session_state.messages.append({"role": "assistant", "content": result})
44
+ st.chat_message('assistant').write(result)
45
+
46
+
47
+
preprocess.py CHANGED
@@ -1,4 +1,5 @@
1
  from langchain.embeddings import OpenAIEmbeddings
 
2
  import os
3
  import pandas as pd
4
  import numpy as np
@@ -6,8 +7,6 @@ from dotenv import load_dotenv
6
  from database import redis_conn
7
  from utilities import create_flat_index, load_vectors
8
 
9
- load_dotenv()
10
- openai_api_key = os.getenv("OPENAI_API_KEY")
11
 
12
  #set maximum length for text fields
13
  MAX_TEXT_LENGTH = 512
@@ -24,12 +23,12 @@ data.reset_index(drop=True, inplace=True)
24
  data_metadata = data.head(500).to_dict(orient='index')
25
 
26
  #generating embeddings (vectors) for the item keywords
27
- # embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
28
- embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
29
 
30
  #get the item keywords attribute for each product and encode them into vector embeddings
31
  item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
32
- item_keywords_vectors = [embedding_model.embed_query(item) for item in item_keywords]
33
 
34
  TEXT_EMBEDDING_DIMENSION=768
35
  NUMBER_PRODUCTS=500
 
1
  from langchain.embeddings import OpenAIEmbeddings
2
+ from sentence_transformers import SentenceTransformer
3
  import os
4
  import pandas as pd
5
  import numpy as np
 
7
  from database import redis_conn
8
  from utilities import create_flat_index, load_vectors
9
 
 
 
10
 
11
  #set maximum length for text fields
12
  MAX_TEXT_LENGTH = 512
 
23
  data_metadata = data.head(500).to_dict(orient='index')
24
 
25
  #generating embeddings (vectors) for the item keywords
26
+ embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
27
+ # embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
28
 
29
  #get the item keywords attribute for each product and encode them into vector embeddings
30
  item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
31
+ item_keywords_vectors = [embedding_model.encode(item) for item in item_keywords]
32
 
33
  TEXT_EMBEDDING_DIMENSION=768
34
  NUMBER_PRODUCTS=500
requirements.txt CHANGED
@@ -3,3 +3,6 @@ openai == 0.27.8
3
  redis == 5.0.1
4
  pandas == 2.0.3
5
  sentence-transformers == 2.2.2
 
 
 
 
3
  redis == 5.0.1
4
  pandas == 2.0.3
5
  sentence-transformers == 2.2.2
6
+ tiktoken == 0.5.1
7
+ streamlit == 1.27.2
8
+ python-dotenv = 1.0.0