Spaces:

Samarth991
/

LLM-Chatbot

Sleeping

Samarth991 commited on Sep 10, 2023

Commit

1c52547

1 Parent(s): 5a2a128

adding online PDF loader

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,13 +48,13 @@ def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
     embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
     document = None
     if doc_type == 'pdf':
-        document = process_pdf_document(document_file_name=file_data)
     elif doc_type == 'text':
-        document = process_text_document(document_file_name=file_data)
     elif doc_type == 'csv':
-        document = process_csv_document(document_file_name=file_data)
     elif doc_type == 'word':
-        document = process_word_document(document_file_name=file_data)
     if document:
         texts = process_documents(documents=document)
         vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
@@ -71,20 +71,20 @@ def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
     return "Ready..."
-def process_text_document(document_file_name):
-    loader = TextLoader(document_file_name)
     document = loader.load()
     return document
-def process_csv_document(document_file_name):
-    loader = CSVLoader(file_path=document_file_name)
     document = loader.load()
     return document
-def process_word_document(document_file_name):
-    loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
     document = loader.load()
     return document

     embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
     document = None
     if doc_type == 'pdf':
+        document = process_pdf_document(document_file=file_data)
     elif doc_type == 'text':
+        document = process_text_document(document_file=file_data)
     elif doc_type == 'csv':
+        document = process_csv_document(document_file=file_data)
     elif doc_type == 'word':
+        document = process_word_document(document_file=file_data)
     if document:
         texts = process_documents(documents=document)
         vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
     return "Ready..."
+def process_text_document(document_file):
+    loader = TextLoader(document_file.name)
     document = loader.load()
     return document
+def process_csv_document(document_file):
+    loader = CSVLoader(file_path=document_file.name)
     document = loader.load()
     return document
+def process_word_document(document_file):
+    loader = UnstructuredWordDocumentLoader(file_path=document_file.name)
     document = loader.load()
     return document