Spaces:
Sleeping
Sleeping
fix auth bug with a new embedding endpoint
Browse files- app.py +5 -4
- spinoza_project/source/backend/llm_utils.py +20 -1
app.py
CHANGED
@@ -7,6 +7,7 @@ from spinoza_project.source.backend.llm_utils import (
|
|
7 |
get_llm,
|
8 |
get_llm_api,
|
9 |
get_vectorstore,
|
|
|
10 |
)
|
11 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
12 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
@@ -46,6 +47,7 @@ llm = get_llm_api()
|
|
46 |
|
47 |
## Loading_tools
|
48 |
print("Loading Databases")
|
|
|
49 |
qdrants = {
|
50 |
tab: pickle_to_document_store(
|
51 |
hf_hub_download(
|
@@ -204,7 +206,7 @@ def answer_questions(*questions_sources, config=config):
|
|
204 |
]
|
205 |
|
206 |
|
207 |
-
def get_sources(questions,
|
208 |
k = config["num_document_retrieved"]
|
209 |
min_similarity = config["min_similarity"]
|
210 |
formated = []
|
@@ -258,9 +260,8 @@ def get_sources(questions, bdd_presse, qdrants=qdrants, config=config):
|
|
258 |
return formated, text
|
259 |
|
260 |
|
261 |
-
def retrieve_sources(*questions, qdrants=qdrants, config=config):
|
262 |
-
|
263 |
-
formated_sources, text_sources = get_sources(questions, bdd_presse, qdrants, config)
|
264 |
|
265 |
return (formated_sources, *text_sources)
|
266 |
|
|
|
7 |
get_llm,
|
8 |
get_llm_api,
|
9 |
get_vectorstore,
|
10 |
+
get_vectorstore_api,
|
11 |
)
|
12 |
from spinoza_project.source.backend.document_store import pickle_to_document_store
|
13 |
from spinoza_project.source.backend.get_prompts import get_qa_prompts
|
|
|
47 |
|
48 |
## Loading_tools
|
49 |
print("Loading Databases")
|
50 |
+
bdd_presse = get_vectorstore_api("presse")
|
51 |
qdrants = {
|
52 |
tab: pickle_to_document_store(
|
53 |
hf_hub_download(
|
|
|
206 |
]
|
207 |
|
208 |
|
209 |
+
def get_sources(questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
|
210 |
k = config["num_document_retrieved"]
|
211 |
min_similarity = config["min_similarity"]
|
212 |
formated = []
|
|
|
260 |
return formated, text
|
261 |
|
262 |
|
263 |
+
def retrieve_sources(*questions, qdrants=qdrants, bdd_presse=bdd_presse, config=config):
|
264 |
+
formated_sources, text_sources = get_sources(questions, qdrants, bdd_presse, config)
|
|
|
265 |
|
266 |
return (formated_sources, *text_sources)
|
267 |
|
spinoza_project/source/backend/llm_utils.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
from tracemalloc import stop
|
2 |
from langchain_openai import AzureChatOpenAI
|
3 |
from msal import ConfidentialClientApplication
|
4 |
from langchain_openai import AzureOpenAIEmbeddings
|
@@ -97,3 +96,23 @@ def get_vectorstore(index_name, model="text-embedding-ada-002"):
|
|
97 |
)
|
98 |
|
99 |
return vector_store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from langchain_openai import AzureChatOpenAI
|
2 |
from msal import ConfidentialClientApplication
|
3 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
|
96 |
)
|
97 |
|
98 |
return vector_store
|
99 |
+
|
100 |
+
|
101 |
+
def get_vectorstore_api(index_name):
|
102 |
+
aoai_embeddings = AzureOpenAIEmbeddings(
|
103 |
+
model="text-embedding-ada-002",
|
104 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
105 |
+
azure_endpoint=os.environ["AZURE_ENDPOINT_API"],
|
106 |
+
openai_api_version=os.getenv("OPENAI_API_VERSION"),
|
107 |
+
)
|
108 |
+
|
109 |
+
os.environ["AZURE_OPENAI_API_KEY"] = get_token()
|
110 |
+
|
111 |
+
vector_store: AzureSearch = AzureSearch(
|
112 |
+
azure_search_endpoint=os.getenv("VECTOR_STORE_ADDRESS"),
|
113 |
+
azure_search_key=os.getenv("VECTOR_STORE_PASSWORD"),
|
114 |
+
index_name=index_name,
|
115 |
+
embedding_function=aoai_embeddings.embed_query,
|
116 |
+
)
|
117 |
+
|
118 |
+
return vector_store
|