import pandas as pd from common.configuration import Configuration class DocumentRanking: def __init__(self, df: pd.DataFrame, config: Configuration): self.df = df self.config = config self.alpha = config.db_config.ranker.alpha self.beta = config.db_config.ranker.beta def doc_ranking(self, query_embedding, scores, indexes): title_embeddings = self.df.iloc[indexes]['TitleEmbedding'].to_list() norms = [] for emb in title_embeddings: d = emb - query_embedding norm = d.dot(d) norms.append(norm) new_score = [] texts = self.df.iloc[indexes]['Text'].to_list() for ind, text in enumerate(texts): new_score.append(scores[ind] * len(text) ** self.beta + self.alpha * norms[ind]) metric_df = pd.DataFrame() metric_df['NewScores'] = new_score metric_df['Indexes'] = indexes metric_df.sort_values(by=['NewScores'], inplace=True) new_indexes = metric_df['Indexes'].to_list()[:self.config.db_config.search.vector_search.k_neighbors] return new_indexes