Spaces:
Running
Running
back to the huggingface API embeddings, remove non-usable embeddings
Browse files- document_qa/document_qa_engine.py +1 -1
- streamlit_app.py +7 -7
document_qa/document_qa_engine.py
CHANGED
@@ -423,7 +423,7 @@ class DocumentQAEngine:
|
|
423 |
if doc_id:
|
424 |
hash = doc_id
|
425 |
else:
|
426 |
-
hash = metadata[0]['hash']
|
427 |
|
428 |
self.data_storage.embed_document(hash, texts, metadata)
|
429 |
|
|
|
423 |
if doc_id:
|
424 |
hash = doc_id
|
425 |
else:
|
426 |
+
hash = metadata[0]['hash'] if len(metadata) > 0 and 'hash' in metadata[0] else ""
|
427 |
|
428 |
self.data_storage.embed_document(hash, texts, metadata)
|
429 |
|
streamlit_app.py
CHANGED
@@ -6,7 +6,7 @@ from tempfile import NamedTemporaryFile
|
|
6 |
import dotenv
|
7 |
from grobid_quantities.quantities import QuantitiesAPI
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
10 |
from langchain_openai import ChatOpenAI
|
11 |
from streamlit_pdf_viewer import pdf_viewer
|
12 |
|
@@ -23,9 +23,7 @@ API_MODELS = {
|
|
23 |
}
|
24 |
|
25 |
API_EMBEDDINGS = {
|
26 |
-
'intfloat/e5-large-
|
27 |
-
'intfloat/multilingual-e5-large-instruct': 'intfloat/multilingual-e5-large-instruct:',
|
28 |
-
'Salesforce/SFR-Embedding-2_R': 'Salesforce/SFR-Embedding-2_R'
|
29 |
}
|
30 |
|
31 |
if 'rqa' not in st.session_state:
|
@@ -135,8 +133,9 @@ def init_qa(model_name, embeddings_name):
|
|
135 |
api_key=os.environ.get('API_KEY')
|
136 |
)
|
137 |
|
138 |
-
embeddings =
|
139 |
-
|
|
|
140 |
|
141 |
storage = DataStorage(embeddings)
|
142 |
return DocumentQAEngine(chat, storage, grobid_url=os.environ['GROBID_URL'], memory=st.session_state['memory'])
|
@@ -320,7 +319,8 @@ if uploaded_file and not st.session_state.loaded_embeddings:
|
|
320 |
st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(
|
321 |
tmp_file.name,
|
322 |
chunk_size=chunk_size,
|
323 |
-
perc_overlap=0.1
|
|
|
324 |
st.session_state['loaded_embeddings'] = True
|
325 |
st.session_state.messages = []
|
326 |
|
|
|
6 |
import dotenv
|
7 |
from grobid_quantities.quantities import QuantitiesAPI
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
+
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpointEmbeddings
|
10 |
from langchain_openai import ChatOpenAI
|
11 |
from streamlit_pdf_viewer import pdf_viewer
|
12 |
|
|
|
23 |
}
|
24 |
|
25 |
API_EMBEDDINGS = {
|
26 |
+
'intfloat/multilingual-e5-large-instruct': 'intfloat/multilingual-e5-large-instruct'
|
|
|
|
|
27 |
}
|
28 |
|
29 |
if 'rqa' not in st.session_state:
|
|
|
133 |
api_key=os.environ.get('API_KEY')
|
134 |
)
|
135 |
|
136 |
+
embeddings = HuggingFaceEndpointEmbeddings(
|
137 |
+
repo_id=API_EMBEDDINGS[embeddings_name]
|
138 |
+
)
|
139 |
|
140 |
storage = DataStorage(embeddings)
|
141 |
return DocumentQAEngine(chat, storage, grobid_url=os.environ['GROBID_URL'], memory=st.session_state['memory'])
|
|
|
319 |
st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(
|
320 |
tmp_file.name,
|
321 |
chunk_size=chunk_size,
|
322 |
+
perc_overlap=0.1
|
323 |
+
)
|
324 |
st.session_state['loaded_embeddings'] = True
|
325 |
st.session_state.messages = []
|
326 |
|