Spaces:
Sleeping
Sleeping
import pandas as pd | |
from common.configuration import Configuration | |
class DocumentRanking: | |
def __init__(self, df: pd.DataFrame, config: Configuration): | |
self.df = df | |
self.config = config | |
self.alpha = config.db_config.ranker.alpha | |
self.beta = config.db_config.ranker.beta | |
def doc_ranking(self, query_embedding, scores, indexes): | |
title_embeddings = self.df.iloc[indexes]['TitleEmbedding'].to_list() | |
norms = [] | |
for emb in title_embeddings: | |
d = emb - query_embedding | |
norm = d.dot(d) | |
norms.append(norm) | |
new_score = [] | |
texts = self.df.iloc[indexes]['Text'].to_list() | |
for ind, text in enumerate(texts): | |
new_score.append(scores[ind] * len(text) ** self.beta + self.alpha * norms[ind]) | |
metric_df = pd.DataFrame() | |
metric_df['NewScores'] = new_score | |
metric_df['Indexes'] = indexes | |
metric_df.sort_values(by=['NewScores'], inplace=True) | |
new_indexes = metric_df['Indexes'].to_list()[:self.config.db_config.search.vector_search.k_neighbors] | |
return new_indexes | |