anasmkh commited on
Commit
9890c92
·
verified ·
1 Parent(s): 3d24a6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -15
app.py CHANGED
@@ -12,11 +12,9 @@ from langchain.document_loaders import PyPDFLoader
12
  import gradio as gr
13
  from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
14
 
15
- # Set OpenAI API Key
16
  openai_api_key = os.getenv('OPENAI_API_KEY')
17
  os.environ["OPENAI_API_KEY"] = openai_api_key
18
 
19
- # Load PDF documents
20
  pdf_folder_path = "files"
21
  documents = []
22
  for filename in os.listdir(pdf_folder_path):
@@ -25,24 +23,19 @@ for filename in os.listdir(pdf_folder_path):
25
  loader = PyPDFLoader(file_path)
26
  documents.extend(loader.load())
27
 
28
- # Split the documents into manageable chunks
29
  text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
30
  docs = text_splitter.split_documents(documents)
31
 
32
- # Initialize embeddings and Qdrant client
33
  embeddings = OpenAIEmbeddings()
34
  qdrant_client = QdrantClient(":memory:")
35
 
36
- # Recreate Qdrant collection
37
  qdrant_client.create_collection(
38
  collection_name="langchain_collection",
39
  vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
40
  )
41
 
42
- # Set up the sparse embeddings for hybrid retrieval
43
  sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
44
 
45
- # Initialize the vector store with hybrid retrieval mode
46
  vector_store = QdrantVectorStore.from_documents(
47
  docs,
48
  embedding=embeddings,
@@ -52,10 +45,8 @@ vector_store = QdrantVectorStore.from_documents(
52
  retrieval_mode=RetrievalMode.HYBRID,
53
  )
54
 
55
- # Set up conversational memory
56
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
57
 
58
- # Set up the retriever
59
  retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
60
 
61
  keyword_retriever = BM25Retriever.from_documents(docs)
@@ -65,10 +56,9 @@ ensemble_retriever = EnsembleRetriever(retrievers=[retriever,keyword_retriever],
65
 
66
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
67
 
68
- # Set up the conversational retrieval chain with memory
69
  conversational_chain = ConversationalRetrievalChain.from_llm(
70
  llm=llm,
71
- retriever=ensemble_retriever,
72
  memory=memory,
73
  verbose=True
74
  )
@@ -81,7 +71,6 @@ def chat_with_ai(user_input, chat_history):
81
 
82
  return chat_history, ""
83
 
84
- # Gradio interface
85
  def gradio_chatbot():
86
  with gr.Blocks() as demo:
87
  gr.Markdown("# Chat Interface for Langchain")
@@ -95,11 +84,9 @@ def gradio_chatbot():
95
 
96
  chat_history = gr.State([])
97
 
98
- # Bind button and textbox to chat function
99
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
100
  user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
101
 
102
  return demo
103
 
104
- # Launch Gradio interface
105
- gradio_chatbot().launch(debug=True)
 
12
  import gradio as gr
13
  from langchain_qdrant import QdrantVectorStore, FastEmbedSparse, RetrievalMode
14
 
 
15
  openai_api_key = os.getenv('OPENAI_API_KEY')
16
  os.environ["OPENAI_API_KEY"] = openai_api_key
17
 
 
18
  pdf_folder_path = "files"
19
  documents = []
20
  for filename in os.listdir(pdf_folder_path):
 
23
  loader = PyPDFLoader(file_path)
24
  documents.extend(loader.load())
25
 
 
26
  text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=5)
27
  docs = text_splitter.split_documents(documents)
28
 
 
29
  embeddings = OpenAIEmbeddings()
30
  qdrant_client = QdrantClient(":memory:")
31
 
 
32
  qdrant_client.create_collection(
33
  collection_name="langchain_collection",
34
  vectors_config=qdrant_models.VectorParams(size=3000, distance=qdrant_models.Distance.COSINE)
35
  )
36
 
 
37
  sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
38
 
 
39
  vector_store = QdrantVectorStore.from_documents(
40
  docs,
41
  embedding=embeddings,
 
45
  retrieval_mode=RetrievalMode.HYBRID,
46
  )
47
 
 
48
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
49
 
 
50
  retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
51
 
52
  keyword_retriever = BM25Retriever.from_documents(docs)
 
56
 
57
  llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
58
 
 
59
  conversational_chain = ConversationalRetrievalChain.from_llm(
60
  llm=llm,
61
+ retriever=keyword_retriever,
62
  memory=memory,
63
  verbose=True
64
  )
 
71
 
72
  return chat_history, ""
73
 
 
74
  def gradio_chatbot():
75
  with gr.Blocks() as demo:
76
  gr.Markdown("# Chat Interface for Langchain")
 
84
 
85
  chat_history = gr.State([])
86
 
 
87
  submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
88
  user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
89
 
90
  return demo
91
 
92
+ gradio_chatbot().launch(debug=True)