anasmkh commited on
Commit
dc35573
·
verified ·
1 Parent(s): 8ac1d4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -22
app.py CHANGED
@@ -3,19 +3,20 @@ from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.vectorstores import Qdrant
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.chains import ConversationalRetrievalChain
6
- from langchain.llms import OpenAI
7
  from langchain.memory import ConversationBufferMemory
8
  from qdrant_client import QdrantClient
9
  from qdrant_client.http import models as qdrant_models
10
  import os
11
  from langchain.document_loaders import PyPDFLoader
12
  import gradio as gr
 
13
 
 
14
  openai_api_key = os.getenv('OPENAI_API_KEY')
15
- openai_api_key = openai_api_key
16
 
 
17
  pdf_folder_path = "files"
18
-
19
  documents = []
20
  for filename in os.listdir(pdf_folder_path):
21
  if filename.endswith(".pdf"):
@@ -23,24 +24,25 @@ for filename in os.listdir(pdf_folder_path):
23
  loader = PyPDFLoader(file_path)
24
  documents.extend(loader.load())
25
 
 
26
  text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=25)
27
  docs = text_splitter.split_documents(documents)
28
 
 
29
  embeddings = OpenAIEmbeddings()
30
-
31
  qdrant_client = QdrantClient(":memory:")
32
 
 
33
  qdrant_client.recreate_collection(
34
  collection_name="langchain_collection",
35
  vectors_config=qdrant_models.VectorParams(size=1536, distance=qdrant_models.Distance.COSINE)
36
  )
37
 
38
- from langchain_qdrant import QdrantVectorStore,FastEmbedSparse,RetrievalMode
39
-
40
  sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
41
 
42
-
43
- vector_store = QdrantVectorStore.from_documents(
44
  docs,
45
  embedding=embeddings,
46
  sparse_embedding=sparse_embeddings,
@@ -48,32 +50,33 @@ vector_store = QdrantVectorStore.from_documents(
48
  collection_name="langchain_collection",
49
  retrieval_mode=RetrievalMode.HYBRID,
50
  )
 
 
51
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
52
 
53
- retriever = vector_store.as_retriever()
54
- # llm = OpenAI(temperature=0.4)
 
 
55
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
 
 
56
  conversational_chain = ConversationalRetrievalChain.from_llm(
57
  llm=llm,
58
  retriever=retriever,
59
- memory=memory
 
60
  )
61
 
62
- query = "What is COMVIVA CDR"
63
- response = conversational_chain.invoke({"question": query})
64
- print(response['answer'])
65
-
66
-
67
-
68
 
69
  def chat_with_ai(user_input, chat_history):
70
- response = conversational_chain.invoke({"question":user_input})
71
-
72
- chat_history.append((user_input, str(response['answer'])))
73
 
74
  return chat_history, ""
75
 
76
-
77
  def gradio_chatbot():
78
  with gr.Blocks() as demo:
79
  gr.Markdown("# Chat Interface for Langchain")
@@ -87,10 +90,11 @@ def gradio_chatbot():
87
 
88
  chat_history = gr.State([])
89
 
 
90
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
91
-
92
  user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
93
 
94
  return demo
95
 
 
96
  gradio_chatbot().launch(debug=True)
 
3
  from langchain.vectorstores import Qdrant
4
  from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.chains import ConversationalRetrievalChain
 
6
  from langchain.memory import ConversationBufferMemory
7
  from qdrant_client import QdrantClient
8
  from qdrant_client.http import models as qdrant_models
9
  import os
10
  from langchain.document_loaders import PyPDFLoader
11
  import gradio as gr
12
+ from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
13
 
14
+ # Set OpenAI API Key
15
  openai_api_key = os.getenv('OPENAI_API_KEY')
16
+ os.environ["OPENAI_API_KEY"] = openai_api_key
17
 
18
+ # Load PDF documents
19
  pdf_folder_path = "files"
 
20
  documents = []
21
  for filename in os.listdir(pdf_folder_path):
22
  if filename.endswith(".pdf"):
 
24
  loader = PyPDFLoader(file_path)
25
  documents.extend(loader.load())
26
 
27
+ # Split the documents into manageable chunks
28
  text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=25)
29
  docs = text_splitter.split_documents(documents)
30
 
31
+ # Initialize embeddings and Qdrant client
32
  embeddings = OpenAIEmbeddings()
 
33
  qdrant_client = QdrantClient(":memory:")
34
 
35
+ # Recreate Qdrant collection
36
  qdrant_client.recreate_collection(
37
  collection_name="langchain_collection",
38
  vectors_config=qdrant_models.VectorParams(size=1536, distance=qdrant_models.Distance.COSINE)
39
  )
40
 
41
+ # Set up the sparse embeddings for hybrid retrieval
 
42
  sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
43
 
44
+ # Initialize the vector store with hybrid retrieval mode
45
+ vector_store = QdrantVectorStore.from_documents(
46
  docs,
47
  embedding=embeddings,
48
  sparse_embedding=sparse_embeddings,
 
50
  collection_name="langchain_collection",
51
  retrieval_mode=RetrievalMode.HYBRID,
52
  )
53
+
54
+ # Set up conversational memory
55
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
56
 
57
+ # Set up the retriever
58
+ retriever = vector_store.as_retriever(search_type="hybrid", search_kwargs={"k": 3})
59
+
60
+ # Set up the language model
61
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
62
+
63
+ # Set up the conversational retrieval chain with memory
64
  conversational_chain = ConversationalRetrievalChain.from_llm(
65
  llm=llm,
66
  retriever=retriever,
67
+ memory=memory,
68
+ verbose=True
69
  )
70
 
 
 
 
 
 
 
71
 
72
  def chat_with_ai(user_input, chat_history):
73
+ response = conversational_chain({"question": user_input})
74
+
75
+ chat_history.append((user_input, response['answer']))
76
 
77
  return chat_history, ""
78
 
79
+ # Gradio interface
80
  def gradio_chatbot():
81
  with gr.Blocks() as demo:
82
  gr.Markdown("# Chat Interface for Langchain")
 
90
 
91
  chat_history = gr.State([])
92
 
93
+ # Bind button and textbox to chat function
94
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
 
95
  user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
96
 
97
  return demo
98
 
99
+ # Launch Gradio interface
100
  gradio_chatbot().launch(debug=True)