Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

PabloVD commited on Oct 30, 2024

Commit

127f3c4

1 Parent(s): fdb4410

fixes

Files changed (3) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
 r = requests.get(url, stream=True)
 document_path = Path('metadata.pdf')
 document_path.write_bytes(r.content)
 worker.process_document(document_path)
 def handle_prompt(message, history):

 r = requests.get(url, stream=True)
 document_path = Path('metadata.pdf')
 document_path.write_bytes(r.content)
+# document_path="2022GS.pdf"
 worker.process_document(document_path)
 def handle_prompt(message, history):

requirements.txt CHANGED Viewed

@@ -1,4 +1,9 @@
 langchain
 langchain-community
 langchain-huggingface
 chromadb

+pdf2image
+pypdf
+tiktoken
 langchain
 langchain-community
 langchain-huggingface
 chromadb
+InstructorEmbedding
+huggingface_hub==0.25.2

worker.py CHANGED Viewed

@@ -5,20 +5,20 @@ from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain_huggingface import HuggingFaceEndpoint
-from sentence_transformers import SentenceTransformer # Use SentenceTransformer module to use Hugging face Model
-import pip
-def install(package):
-    if hasattr(pip, 'main'):
-        pip.main(['install', package])
-    else:
-        pip._internal.main(['install', package])
-# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
 # install("sentence-transformers==2.2.2")
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 # Global variables
 conversation_retrieval_chain = None
@@ -49,11 +49,9 @@ def init_llm():
     #Initialize embeddings using a pre-trained model to represent the text data.
     embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
     # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
-    emb_model = SentenceTransformer(embedddings_model)
     embeddings = HuggingFaceInstructEmbeddings(
-        model_name=emb_model,
         model_kwargs={"device": DEVICE}
     )

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain_huggingface import HuggingFaceEndpoint
+# import pip
+# def install(package):
+#     if hasattr(pip, 'main'):
+#         pip.main(['install', package])
+#     else:
+#         pip._internal.main(['install', package])
+# # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
 # install("sentence-transformers==2.2.2")
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
+# DEVICE = "cpu"
 # Global variables
 conversation_retrieval_chain = None
     #Initialize embeddings using a pre-trained model to represent the text data.
     embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
     # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
     embeddings = HuggingFaceInstructEmbeddings(
+        model_name=embedddings_model,
         model_kwargs={"device": DEVICE}
     )