basic chatbot
Browse files- __pycache__/app.cpython-39.pyc +0 -0
- _rise_faq_db/index.faiss +0 -0
- _rise_faq_db/index.pkl +0 -0
- app.py +41 -5
- requirements.txt +6 -1
- test.py +105 -0
__pycache__/app.cpython-39.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
|
|
|
_rise_faq_db/index.faiss
ADDED
|
Binary file (12.3 kB). View file
|
|
|
_rise_faq_db/index.pkl
ADDED
|
Binary file (6.25 kB). View file
|
|
|
app.py
CHANGED
|
@@ -3,16 +3,52 @@
|
|
| 3 |
from flask import Flask,request
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
-
from
|
|
|
|
| 7 |
|
| 8 |
# Initializing flask app
|
| 9 |
app = Flask(__name__)
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@app.route('/', methods=['GET','POST'])
|
| 13 |
def index():
|
| 14 |
-
llm = ChatOpenAI()
|
| 15 |
-
response=llm.invoke("how can langsmith help with testing?")
|
| 16 |
-
print(response)
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from flask import Flask,request
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
+
from langchain.agents import tool
|
| 7 |
+
|
| 8 |
|
| 9 |
# Initializing flask app
|
| 10 |
app = Flask(__name__)
|
| 11 |
load_dotenv()
|
| 12 |
|
| 13 |
+
|
| 14 |
+
@tool
|
| 15 |
+
def FAQ(question: str):
|
| 16 |
+
"""Answers the question 1+1"""
|
| 17 |
+
return 23
|
| 18 |
+
|
| 19 |
+
tools=[FAQ]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
@app.route('/', methods=['GET','POST'])
|
| 23 |
def index():
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
input = {
|
| 26 |
+
"page_context":"home",
|
| 27 |
+
"user_summary":"The user is a first year student on BA Architecture",
|
| 28 |
+
"session_summary":"The user has introduced themselves as Mark Peace and asked how the bot is doing",
|
| 29 |
+
"user_input":"Can you remind me of my own name?"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
from langchain_openai import ChatOpenAI
|
| 34 |
+
from langchain.agents import create_openai_functions_agent
|
| 35 |
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 36 |
+
from langchain.agents import AgentExecutor
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
| 40 |
+
|
| 41 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 42 |
+
("system", "You are a helpful AI bot. Your name is Bob. Please do not answer if you aren't sure of the answer"),
|
| 43 |
+
("system", "Here is a summary of the conversation so far: {session_summary}"),
|
| 44 |
+
("human", "{user_input}"),
|
| 45 |
+
MessagesPlaceholder(variable_name="agent_scratchpad")
|
| 46 |
+
])
|
| 47 |
+
|
| 48 |
+
agent = create_openai_functions_agent(llm, tools, prompt)
|
| 49 |
+
|
| 50 |
+
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
|
| 51 |
+
|
| 52 |
+
response=agent_executor.invoke(input)
|
| 53 |
+
|
| 54 |
+
return response
|
requirements.txt
CHANGED
|
@@ -5,5 +5,10 @@ gunicorn
|
|
| 5 |
python-dotenv
|
| 6 |
|
| 7 |
#LLM
|
|
|
|
| 8 |
langchain
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
python-dotenv
|
| 6 |
|
| 7 |
#LLM
|
| 8 |
+
bs4
|
| 9 |
langchain
|
| 10 |
+
torch
|
| 11 |
+
transformers
|
| 12 |
+
sentence-transformers
|
| 13 |
+
datasets
|
| 14 |
+
faiss-cpu
|
test.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#import json
|
| 2 |
+
|
| 3 |
+
from flask import Flask,request
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
from langchain.document_loaders import WebBaseLoader
|
| 7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
| 9 |
+
from langchain.vectorstores import FAISS
|
| 10 |
+
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
| 11 |
+
from transformers import AutoTokenizer, pipeline
|
| 12 |
+
from langchain import HuggingFacePipeline
|
| 13 |
+
from langchain.chains import RetrievalQA
|
| 14 |
+
|
| 15 |
+
# Initializing flask app
|
| 16 |
+
app = Flask(__name__)
|
| 17 |
+
load_dotenv()
|
| 18 |
+
|
| 19 |
+
@app.route("/train/faq", methods=['GET','POST'])
|
| 20 |
+
def embeddings_faqs():
|
| 21 |
+
|
| 22 |
+
data = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
|
| 23 |
+
|
| 24 |
+
# Create an instance of the RecursiveCharacterTextSplitter class with specific parameters.
|
| 25 |
+
# It splits text into chunks of 1000 characters each with a 150-character overlap.
|
| 26 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
|
| 27 |
+
|
| 28 |
+
# 'data' holds the text you want to split, split the text into documents using the text splitter.
|
| 29 |
+
docs = text_splitter.split_documents(data)
|
| 30 |
+
|
| 31 |
+
# Define the path to the pre-trained model you want to use
|
| 32 |
+
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
| 33 |
+
|
| 34 |
+
# Create a dictionary with model configuration options, specifying to use the CPU for computations
|
| 35 |
+
model_kwargs = {'device':'cpu'}
|
| 36 |
+
|
| 37 |
+
# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
|
| 38 |
+
encode_kwargs = {'normalize_embeddings': False}
|
| 39 |
+
|
| 40 |
+
# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
|
| 41 |
+
embeddings = HuggingFaceEmbeddings(
|
| 42 |
+
model_name=modelPath, # Provide the pre-trained model's path
|
| 43 |
+
model_kwargs=model_kwargs, # Pass the model configuration options
|
| 44 |
+
encode_kwargs=encode_kwargs # Pass the encoding options
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# Create vectors
|
| 48 |
+
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 49 |
+
# Persist the vectors locally on disk
|
| 50 |
+
vectorstore.save_local("_rise_faq_db");
|
| 51 |
+
|
| 52 |
+
return {"trained":"success"}
|
| 53 |
+
|
| 54 |
+
@app.route('/ask', methods=['GET','POST'])
|
| 55 |
+
def ask():
|
| 56 |
+
# Specify the model name you want to use
|
| 57 |
+
model_name = "Intel/dynamic_tinybert"
|
| 58 |
+
|
| 59 |
+
# Load the tokenizer associated with the specified model
|
| 60 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512)
|
| 61 |
+
|
| 62 |
+
# Define a question-answering pipeline using the model and tokenizer
|
| 63 |
+
question_answerer = pipeline(
|
| 64 |
+
"question-answering",
|
| 65 |
+
model=model_name,
|
| 66 |
+
tokenizer=tokenizer,
|
| 67 |
+
return_tensors='pt'
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
|
| 71 |
+
# with additional model-specific arguments (temperature and max_length)
|
| 72 |
+
llm = HuggingFacePipeline(
|
| 73 |
+
pipeline=question_answerer,
|
| 74 |
+
model_kwargs={"temperature": 0.7, "max_length": 512},
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Define the path to the pre-trained model you want to use
|
| 78 |
+
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
| 79 |
+
|
| 80 |
+
# Create a dictionary with model configuration options, specifying to use the CPU for computations
|
| 81 |
+
model_kwargs = {'device':'cpu'}
|
| 82 |
+
|
| 83 |
+
# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
|
| 84 |
+
encode_kwargs = {'normalize_embeddings': False}
|
| 85 |
+
|
| 86 |
+
# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
|
| 87 |
+
embeddings = HuggingFaceEmbeddings(
|
| 88 |
+
model_name=modelPath, # Provide the pre-trained model's path
|
| 89 |
+
model_kwargs=model_kwargs, # Pass the model configuration options
|
| 90 |
+
encode_kwargs=encode_kwargs # Pass the encoding options
|
| 91 |
+
)
|
| 92 |
+
persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
|
| 93 |
+
|
| 94 |
+
# Create a retriever object from the 'db' using the 'as_retriever' method.
|
| 95 |
+
# This retriever is likely used for retrieving data or documents from the database.
|
| 96 |
+
retriever = persisted_vectorstore.as_retriever()
|
| 97 |
+
|
| 98 |
+
docs = retriever.get_relevant_documents("What are the benefits?")
|
| 99 |
+
print(docs[0].page_content)
|
| 100 |
+
|
| 101 |
+
return "uip"
|
| 102 |
+
|
| 103 |
+
@app.route('/', methods=['GET','POST'])
|
| 104 |
+
def index():
|
| 105 |
+
return {"response":"just some junk response"}
|