Justin44 commited on
Commit
05f672b
·
verified ·
1 Parent(s): ef355d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -12
app.py CHANGED
@@ -5,9 +5,14 @@ from sentence_transformers import SentenceTransformer
5
  from transformers import AutoTokenizer, AutoModelForMaskedLM
6
  from qdrant_client import models
7
  import logging
 
8
 
9
  # --- Setup Logging ---
10
- logging.basicConfig(level=logging.INFO)
 
 
 
 
11
  logger = logging.getLogger(__name__)
12
 
13
  # --- Configuration ---
@@ -53,14 +58,15 @@ async def load_models():
53
  This ensures models are loaded only once.
54
  """
55
  global dense_model, splade_tokenizer, splade_model
56
- logger.info(f"Loading models onto device: {DEVICE}")
 
57
  try:
58
  dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
59
  splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
60
  splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
61
- logger.info("Models initialized successfully.")
62
  except Exception as e:
63
- logger.fatal(f"FATAL: Could not initialize models. Error: {e}")
64
  # In a real-world scenario, you might want the app to fail startup if models don't load.
65
  raise e
66
 
@@ -107,27 +113,36 @@ async def vectorize_query(request: QueryRequest):
107
  Returns:
108
  A JSON response containing the dense and sparse vectors.
109
  """
110
- logger.info(f"Received query for vectorization: '{request.query_text}'")
111
-
 
 
 
112
  # 1. Generate Dense Vector
113
- logger.info("Generating dense vector...")
114
  dense_query_vector = dense_model.encode(request.query_text).tolist()
115
- logger.info("Dense vector generated.")
 
116
 
117
  # 2. Generate Sparse Vector
118
- logger.info("Generating sparse vector...")
119
  sparse_query_vector = compute_splade_vector(request.query_text)
120
- logger.info("Sparse vector generated.")
 
121
 
122
  # 3. Construct and return the response
123
- return VectorResponse(
 
 
 
124
  dense_vector=dense_query_vector,
125
  sparse_vector=SparseVectorResponse(
126
  indices=sparse_query_vector.indices,
127
  values=sparse_query_vector.values
128
  )
129
  )
 
130
 
131
  @app.get("/", include_in_schema=False)
132
  async def root():
133
- return {"message": "Vector Generation API is running. -- VERSION 2 --"}
 
5
  from transformers import AutoTokenizer, AutoModelForMaskedLM
6
  from qdrant_client import models
7
  import logging
8
+ import json
9
 
10
  # --- Setup Logging ---
11
+ # Configure logging to be more descriptive
12
+ logging.basicConfig(
13
+ level=logging.INFO,
14
+ format='%(asctime)s - %(levelname)s - %(message)s',
15
+ )
16
  logger = logging.getLogger(__name__)
17
 
18
  # --- Configuration ---
 
58
  This ensures models are loaded only once.
59
  """
60
  global dense_model, splade_tokenizer, splade_model
61
+ logger.info("Server is starting up... Time to load the ML models.")
62
+ logger.info(f"I'll be using the '{DEVICE}' for processing.")
63
  try:
64
  dense_model = SentenceTransformer(DENSE_MODEL_ID, device=DEVICE)
65
  splade_tokenizer = AutoTokenizer.from_pretrained(SPLADE_QUERY_MODEL_ID)
66
  splade_model = AutoModelForMaskedLM.from_pretrained(SPLADE_QUERY_MODEL_ID).to(DEVICE)
67
+ logger.info("Great news! All models have been loaded successfully.")
68
  except Exception as e:
69
+ logger.critical(f"Oh no, a critical error occurred while loading models: {e}", exc_info=True)
70
  # In a real-world scenario, you might want the app to fail startup if models don't load.
71
  raise e
72
 
 
113
  Returns:
114
  A JSON response containing the dense and sparse vectors.
115
  """
116
+ # --- n8n Logging ---
117
+ logger.info("=========================================================")
118
+ logger.info("A new request just arrived! Let's see what we've got.")
119
+ logger.info(f"The incoming search query from n8n is: '{request.query_text}'")
120
+
121
  # 1. Generate Dense Vector
122
+ logger.info("First, I'm generating the dense vector for semantic meaning...")
123
  dense_query_vector = dense_model.encode(request.query_text).tolist()
124
+ logger.info("Done with the dense vector. It has %d dimensions.", len(dense_query_vector))
125
+ logger.info("Here's a small sample of the dense vector: %s...", str(dense_query_vector[:4]))
126
 
127
  # 2. Generate Sparse Vector
128
+ logger.info("Next up, creating the sparse vector for keyword matching...")
129
  sparse_query_vector = compute_splade_vector(request.query_text)
130
+ logger.info("Sparse vector is ready. It contains %d important terms.", len(sparse_query_vector.indices))
131
+ logger.info("Here's a sample of the sparse vector indices: %s...", str(sparse_query_vector.indices[:4]))
132
 
133
  # 3. Construct and return the response
134
+ logger.info("Everything looks good. I'm packaging up the vectors to send back.")
135
+ logger.info("=========================================================")
136
+
137
+ final_response = VectorResponse(
138
  dense_vector=dense_query_vector,
139
  sparse_vector=SparseVectorResponse(
140
  indices=sparse_query_vector.indices,
141
  values=sparse_query_vector.values
142
  )
143
  )
144
+ return final_response
145
 
146
  @app.get("/", include_in_schema=False)
147
  async def root():
148
+ return {"message": "Vector Generation API is running. -- VERSION 2 --"}