Update app.py
Browse files
app.py
CHANGED
@@ -300,20 +300,23 @@ def get_movie_embeddings(conn):
|
|
300 |
|
301 |
def rerank_results(query, results):
|
302 |
"""Переранжирует результаты поиска с помощью реранкера."""
|
|
|
303 |
pairs = []
|
304 |
movie_ids = []
|
305 |
-
for movie_id, _ in results:
|
306 |
movie = next((m for m in movies_data if m['id'] == movie_id), None)
|
307 |
if movie:
|
308 |
movie_info = f"Название: {movie['name']}\nГод: {movie['year']}\nЖанры: {movie['genreslist']}\nОписание: {movie['description']}"
|
309 |
pairs.append([query, movie_info])
|
310 |
movie_ids.append(movie_id)
|
|
|
311 |
|
312 |
with torch.no_grad():
|
313 |
inputs = reranker_tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
314 |
scores = reranker_model(**inputs, return_dict=True).logits.view(-1, ).float()
|
315 |
|
316 |
reranked_results = sorted(zip(movie_ids, scores.tolist()), key=lambda x: x[1], reverse=True)
|
|
|
317 |
return reranked_results
|
318 |
|
319 |
def search_movies(query, top_k=20):
|
@@ -359,7 +362,7 @@ def search_movies(query, top_k=20):
|
|
359 |
FROM {embeddings_table} m, query_embedding
|
360 |
ORDER BY similarity DESC
|
361 |
LIMIT %s
|
362 |
-
""", (query_crc32, top_k *
|
363 |
|
364 |
results = cur.fetchall()
|
365 |
logging.info(f"Найдено {len(results)} предварительных результатов поиска.")
|
|
|
300 |
|
301 |
def rerank_results(query, results):
|
302 |
"""Переранжирует результаты поиска с помощью реранкера."""
|
303 |
+
logging.info(f"Начало переранжирования для запроса: '{query}'")
|
304 |
pairs = []
|
305 |
movie_ids = []
|
306 |
+
for i, (movie_id, _) in enumerate(results):
|
307 |
movie = next((m for m in movies_data if m['id'] == movie_id), None)
|
308 |
if movie:
|
309 |
movie_info = f"Название: {movie['name']}\nГод: {movie['year']}\nЖанры: {movie['genreslist']}\nОписание: {movie['description']}"
|
310 |
pairs.append([query, movie_info])
|
311 |
movie_ids.append(movie_id)
|
312 |
+
logging.info(f"Обработка фильма для реранка {i+1}/{len(results)}: {movie['name']}")
|
313 |
|
314 |
with torch.no_grad():
|
315 |
inputs = reranker_tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
316 |
scores = reranker_model(**inputs, return_dict=True).logits.view(-1, ).float()
|
317 |
|
318 |
reranked_results = sorted(zip(movie_ids, scores.tolist()), key=lambda x: x[1], reverse=True)
|
319 |
+
logging.info("Переранжирование завершено.")
|
320 |
return reranked_results
|
321 |
|
322 |
def search_movies(query, top_k=20):
|
|
|
362 |
FROM {embeddings_table} m, query_embedding
|
363 |
ORDER BY similarity DESC
|
364 |
LIMIT %s
|
365 |
+
""", (query_crc32, int(top_k * 1))) # Уменьшаем лимит до * 1.1
|
366 |
|
367 |
results = cur.fetchall()
|
368 |
logging.info(f"Найдено {len(results)} предварительных результатов поиска.")
|