anasmkh commited on
Commit
5a5b7c4
·
verified ·
1 Parent(s): 77e1a65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -59
app.py CHANGED
@@ -1,22 +1,19 @@
1
- from langchain.chains import RetrievalQA
2
  from langchain.chat_models import ChatOpenAI
 
 
 
 
3
  from langchain.llms import OpenAI
4
  from langchain.memory import ConversationBufferMemory
5
- from langchain.chains import ConversationalRetrievalChain
6
- from langchain.text_splitter import CharacterTextSplitter
7
- from langchain.indexes import VectorstoreIndexCreator
8
- from langchain.document_loaders import PyPDFLoader
9
- from langchain.embeddings import OpenAIEmbeddings
10
- from langchain_core.vectorstores import InMemoryVectorStore
11
- from langchain.vectorstores import FAISS
12
- from langchain.retrievers import BM25Retriever,EnsembleRetriever
13
- from langchain_core.prompts import ChatPromptTemplate
14
- from langchain_core.output_parsers import StrOutputParser
15
- from langchain.schema.runnable import RunnablePassthrough
16
- import gradio as gr
17
  import os
 
18
 
19
- pdf_folder_path = "files"
 
 
 
20
 
21
  documents = []
22
  for filename in os.listdir(pdf_folder_path):
@@ -25,69 +22,51 @@ for filename in os.listdir(pdf_folder_path):
25
  loader = PyPDFLoader(file_path)
26
  documents.extend(loader.load())
27
 
28
- text_splitter = CharacterTextSplitter()
29
- text_splits=text_splitter.split_documents(documents)
30
-
31
-
32
- openai_api_key = os.getenv('OPENAI_API_KEY')
33
- openai_api_key = openai_api_key
34
 
35
  embeddings = OpenAIEmbeddings()
36
 
37
- vector_store = FAISS.from_documents(documents, embeddings)
38
 
39
- retriever_vectordb = vector_store.as_retriever(search_kwargs={"k": 5})
40
- keyword_retriever = BM25Retriever.from_documents(text_splits)
41
- keyword_retriever.k = 5
42
- ensemble_retriever = EnsembleRetriever(retrievers=[retriever_vectordb,keyword_retriever],
43
- weights=[0.5, 0.5])
44
 
 
45
 
 
46
 
47
- llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4, api_key=openai_api_key)
48
 
49
- memory = ConversationBufferMemory(
50
- memory_key="chat_history",
51
- input_key="question" ,
52
- return_messages=True
 
 
 
53
  )
 
54
 
55
-
56
- conversation_chain = ConversationalRetrievalChain.from_llm(
57
- retriever=ensemble_retriever,
 
58
  llm=llm,
59
- memory=memory,
60
- verbose=False
61
  )
62
 
 
 
 
63
 
64
- template = """
65
- <|system|>>
66
- You are an AI Assistant that follows instructions extremely well.
67
- Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT
68
-
69
- CONTEXT: {context}
70
- </s>
71
- <|user|>
72
- {query}
73
- </s>
74
- <|assistant|>
75
- """
76
-
77
- prompt = ChatPromptTemplate.from_template(template)
78
- output_parser = StrOutputParser()
79
-
80
- chain = (
81
- {"context": conversation_chain, "query": RunnablePassthrough()}
82
- | prompt
83
- | llm
84
- | output_parser
85
- )
86
 
87
 
88
 
89
  def chat_with_ai(user_input, chat_history):
90
- response = chain.invoke(user_input)
91
 
92
  chat_history.append((user_input, str(response)))
93
 
 
 
1
  from langchain.chat_models import ChatOpenAI
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.vectorstores import Qdrant
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.chains import ConversationalRetrievalChain
6
  from langchain.llms import OpenAI
7
  from langchain.memory import ConversationBufferMemory
8
+ from qdrant_client import QdrantClient
9
+ from qdrant_client.http import models as qdrant_models
 
 
 
 
 
 
 
 
 
 
10
  import os
11
+ from langchain.document_loaders import PyPDFLoader
12
 
13
+ openai_api_key = os.getenv('OPENAI_API_KEY')
14
+ openai_api_key = openai_api_key
15
+
16
+ pdf_folder_path = "/content/new_files"
17
 
18
  documents = []
19
  for filename in os.listdir(pdf_folder_path):
 
22
  loader = PyPDFLoader(file_path)
23
  documents.extend(loader.load())
24
 
25
+ text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=25)
26
+ docs = text_splitter.split_documents(documents)
 
 
 
 
27
 
28
  embeddings = OpenAIEmbeddings()
29
 
30
+ qdrant_client = QdrantClient(":memory:")
31
 
32
+ qdrant_client.recreate_collection(
33
+ collection_name="langchain_collection",
34
+ vectors_config=qdrant_models.VectorParams(size=1536, distance=qdrant_models.Distance.COSINE)
35
+ )
 
36
 
37
+ from langchain_qdrant import QdrantVectorStore,FastEmbedSparse,RetrievalMode
38
 
39
+ sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
40
 
 
41
 
42
+ vector_store = QdrantVectorStore.from_documents(
43
+ docs,
44
+ embedding=embeddings,
45
+ sparse_embedding=sparse_embeddings,
46
+ location=":memory:",
47
+ collection_name="langchain_collection",
48
+ retrieval_mode=RetrievalMode.HYBRID,
49
  )
50
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
51
 
52
+ retriever = vector_store.as_retriever()
53
+ # llm = OpenAI(temperature=0.4)
54
+ llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
55
+ conversational_chain = ConversationalRetrievalChain.from_llm(
56
  llm=llm,
57
+ retriever=retriever,
58
+ memory=memory
59
  )
60
 
61
+ query = "What is COMVIVA CDR"
62
+ response = conversational_chain.invoke({"question": query})
63
+ print(response['answer'])
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
 
68
  def chat_with_ai(user_input, chat_history):
69
+ response = conversational_chain.invoke({"question":user_input})
70
 
71
  chat_history.append((user_input, str(response)))
72