Spaces:
Sleeping
Sleeping
Commit
·
ef9e1ba
1
Parent(s):
443f232
Adding Langchain QA
Browse files
app.py
CHANGED
|
@@ -6,9 +6,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
| 6 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
| 7 |
from langchain.vectorstores import FAISS
|
| 8 |
from langchain import HuggingFaceHub
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
DEVICE = 'cpu '
|
| 12 |
FILE_EXT = ['pdf','text','csv','word','wav']
|
| 13 |
|
| 14 |
|
|
@@ -36,14 +37,14 @@ def get_hugging_face_model(model_id,API_key,temperature=0.1):
|
|
| 36 |
model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
|
| 37 |
return chat_llm
|
| 38 |
|
| 39 |
-
def chat_application(
|
| 40 |
if llm_model == 'HuggingFace':
|
| 41 |
llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
|
| 42 |
else:
|
| 43 |
llm_model = get_openai_chat_model(API_key=key)
|
| 44 |
|
| 45 |
|
| 46 |
-
def document_loader(file_data,doc_type='pdf',
|
| 47 |
embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
|
| 48 |
document = None
|
| 49 |
if doc_type == 'pdf':
|
|
@@ -54,15 +55,21 @@ def document_loader(file_data,doc_type='pdf',key=None):
|
|
| 54 |
document = process_csv_document(document_file_name=file_data)
|
| 55 |
elif doc_type == 'word':
|
| 56 |
document = process_word_document(document_file_name=file_data)
|
| 57 |
-
|
| 58 |
if document:
|
| 59 |
texts = process_documents(documents=document)
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
else:
|
| 63 |
return "Error in loading Documents "
|
| 64 |
|
| 65 |
-
return "
|
| 66 |
|
| 67 |
|
| 68 |
def process_text_document(document_file_name):
|
|
@@ -125,6 +132,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 125 |
submit_button = gr.Button("Send Message")
|
| 126 |
load_pdf.click(loading_file, None, langchain_status, queue=False)
|
| 127 |
load_pdf.click(document_loader, inputs=[pdf_doc,file_extension,API_key], outputs=[langchain_status], queue=False)
|
|
|
|
| 128 |
# question.submit(add_text, [chatbot, question], [chatbot, question]).then(
|
| 129 |
# bot, chatbot, chatbot
|
| 130 |
# )
|
|
|
|
| 6 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
| 7 |
from langchain.vectorstores import FAISS
|
| 8 |
from langchain import HuggingFaceHub
|
| 9 |
+
from langchain.chains import RetrievalQA
|
| 10 |
+
from langchain.prompts import PromptTemplate
|
| 11 |
|
| 12 |
+
DEVICE = 'cpu'
|
|
|
|
| 13 |
FILE_EXT = ['pdf','text','csv','word','wav']
|
| 14 |
|
| 15 |
|
|
|
|
| 37 |
model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
|
| 38 |
return chat_llm
|
| 39 |
|
| 40 |
+
def chat_application(llm_service,key):
|
| 41 |
if llm_model == 'HuggingFace':
|
| 42 |
llm = get_hugging_face_model(model_id='tiiuae/falcon-7b-instruct',API_key=key)
|
| 43 |
else:
|
| 44 |
llm_model = get_openai_chat_model(API_key=key)
|
| 45 |
|
| 46 |
|
| 47 |
+
def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
|
| 48 |
embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
|
| 49 |
document = None
|
| 50 |
if doc_type == 'pdf':
|
|
|
|
| 55 |
document = process_csv_document(document_file_name=file_data)
|
| 56 |
elif doc_type == 'word':
|
| 57 |
document = process_word_document(document_file_name=file_data)
|
| 58 |
+
print(document)
|
| 59 |
if document:
|
| 60 |
texts = process_documents(documents=document)
|
| 61 |
+
vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
|
| 62 |
+
global qa
|
| 63 |
+
qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
|
| 64 |
+
chain_type='stuff',
|
| 65 |
+
retriever=vector_db.as_retriever(),
|
| 66 |
+
# chain_type_kwargs=chain_type_kwargs,
|
| 67 |
+
return_source_documents=True
|
| 68 |
+
)
|
| 69 |
else:
|
| 70 |
return "Error in loading Documents "
|
| 71 |
|
| 72 |
+
return "Ready..."
|
| 73 |
|
| 74 |
|
| 75 |
def process_text_document(document_file_name):
|
|
|
|
| 132 |
submit_button = gr.Button("Send Message")
|
| 133 |
load_pdf.click(loading_file, None, langchain_status, queue=False)
|
| 134 |
load_pdf.click(document_loader, inputs=[pdf_doc,file_extension,API_key], outputs=[langchain_status], queue=False)
|
| 135 |
+
|
| 136 |
# question.submit(add_text, [chatbot, question], [chatbot, question]).then(
|
| 137 |
# bot, chatbot, chatbot
|
| 138 |
# )
|