muryshev's picture
init
57cf043
raw
history blame
1.13 kB
import pandas as pd
from common.configuration import Configuration
class DocumentRanking:
def __init__(self, df: pd.DataFrame, config: Configuration):
self.df = df
self.config = config
self.alpha = config.db_config.ranker.alpha
self.beta = config.db_config.ranker.beta
def doc_ranking(self, query_embedding, scores, indexes):
title_embeddings = self.df.iloc[indexes]['TitleEmbedding'].to_list()
norms = []
for emb in title_embeddings:
d = emb - query_embedding
norm = d.dot(d)
norms.append(norm)
new_score = []
texts = self.df.iloc[indexes]['Text'].to_list()
for ind, text in enumerate(texts):
new_score.append(scores[ind] * len(text) ** self.beta + self.alpha * norms[ind])
metric_df = pd.DataFrame()
metric_df['NewScores'] = new_score
metric_df['Indexes'] = indexes
metric_df.sort_values(by=['NewScores'], inplace=True)
new_indexes = metric_df['Indexes'].to_list()[:self.config.db_config.search.vector_search.k_neighbors]
return new_indexes