Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,14 @@ from sentence_transformers import SentenceTransformer
|
|
5 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
6 |
from qdrant_client import models
|
7 |
import logging
|
|
|
8 |
|
9 |
# --- Setup Logging ---
|
10 |
-
logging
|
|
|
|
|
|
|
|
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
# --- Configuration ---
|
@@ -53,14 +58,15 @@ async def load_models():
|
|
53 |
This ensures models are loaded only once.
|
54 |
"""
|
55 |
global dense_model, splade_tokenizer, splade_model
|
56 |
-
logger.info(
|
|
|
57 |
try:
|
58 |
dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
|
59 |
splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
|
60 |
splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
|
61 |
-
logger.info("
|
62 |
except Exception as e:
|
63 |
-
logger.
|
64 |
# In a real-world scenario, you might want the app to fail startup if models don't load.
|
65 |
raise e
|
66 |
|
@@ -107,27 +113,36 @@ async def vectorize_query(request: QueryRequest):
|
|
107 |
Returns:
|
108 |
A JSON response containing the dense and sparse vectors.
|
109 |
"""
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
112 |
# 1. Generate Dense Vector
|
113 |
-
logger.info("
|
114 |
dense_query_vector = dense_model.encode(request.query_text).tolist()
|
115 |
-
logger.info("
|
|
|
116 |
|
117 |
# 2. Generate Sparse Vector
|
118 |
-
logger.info("
|
119 |
sparse_query_vector = compute_splade_vector(request.query_text)
|
120 |
-
logger.info("Sparse vector
|
|
|
121 |
|
122 |
# 3. Construct and return the response
|
123 |
-
|
|
|
|
|
|
|
124 |
dense_vector=dense_query_vector,
|
125 |
sparse_vector=SparseVectorResponse(
|
126 |
indices=sparse_query_vector.indices,
|
127 |
values=sparse_query_vector.values
|
128 |
)
|
129 |
)
|
|
|
130 |
|
131 |
@app.get("/", include_in_schema=False)
|
132 |
async def root():
|
133 |
-
return {"message": "Vector Generation API is running. -- VERSION 2 --"}
|
|
|
5 |
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
6 |
from qdrant_client import models
|
7 |
import logging
|
8 |
+
import json
|
9 |
|
10 |
# --- Setup Logging ---
|
11 |
+
# Configure logging to be more descriptive
|
12 |
+
logging.basicConfig(
|
13 |
+
level=logging.INFO,
|
14 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
15 |
+
)
|
16 |
logger = logging.getLogger(__name__)
|
17 |
|
18 |
# --- Configuration ---
|
|
|
58 |
This ensures models are loaded only once.
|
59 |
"""
|
60 |
global dense_model, splade_tokenizer, splade_model
|
61 |
+
logger.info("Server is starting up... Time to load the ML models.")
|
62 |
+
logger.info(f"I'll be using the '{DEVICE}' for processing.")
|
63 |
try:
|
64 |
dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
|
65 |
splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
|
66 |
splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
|
67 |
+
logger.info("Great news! All models have been loaded successfully.")
|
68 |
except Exception as e:
|
69 |
+
logger.critical(f"Oh no, a critical error occurred while loading models: {e}", exc_info=True)
|
70 |
# In a real-world scenario, you might want the app to fail startup if models don't load.
|
71 |
raise e
|
72 |
|
|
|
113 |
Returns:
|
114 |
A JSON response containing the dense and sparse vectors.
|
115 |
"""
|
116 |
+
# --- n8n Logging ---
|
117 |
+
logger.info("=========================================================")
|
118 |
+
logger.info("A new request just arrived! Let's see what we've got.")
|
119 |
+
logger.info(f"The incoming search query from n8n is: '{request.query_text}'")
|
120 |
+
|
121 |
# 1. Generate Dense Vector
|
122 |
+
logger.info("First, I'm generating the dense vector for semantic meaning...")
|
123 |
dense_query_vector = dense_model.encode(request.query_text).tolist()
|
124 |
+
logger.info("Done with the dense vector. It has %d dimensions.", len(dense_query_vector))
|
125 |
+
logger.info("Here's a small sample of the dense vector: %s...", str(dense_query_vector[:4]))
|
126 |
|
127 |
# 2. Generate Sparse Vector
|
128 |
+
logger.info("Next up, creating the sparse vector for keyword matching...")
|
129 |
sparse_query_vector = compute_splade_vector(request.query_text)
|
130 |
+
logger.info("Sparse vector is ready. It contains %d important terms.", len(sparse_query_vector.indices))
|
131 |
+
logger.info("Here's a sample of the sparse vector indices: %s...", str(sparse_query_vector.indices[:4]))
|
132 |
|
133 |
# 3. Construct and return the response
|
134 |
+
logger.info("Everything looks good. I'm packaging up the vectors to send back.")
|
135 |
+
logger.info("=========================================================")
|
136 |
+
|
137 |
+
final_response = VectorResponse(
|
138 |
dense_vector=dense_query_vector,
|
139 |
sparse_vector=SparseVectorResponse(
|
140 |
indices=sparse_query_vector.indices,
|
141 |
values=sparse_query_vector.values
|
142 |
)
|
143 |
)
|
144 |
+
return final_response
|
145 |
|
146 |
@app.get("/", include_in_schema=False)
|
147 |
async def root():
|
148 |
+
return {"message": "Vector Generation API is running. -- VERSION 2 --"}
|