Spaces:
Sleeping
Sleeping
#%% | |
import pandas as pd | |
import numpy as np | |
import torch | |
from sentence_transformers.util import cos_sim | |
from sentence_transformers import SentenceTransformer | |
import gradio as gr | |
#%% | |
etalon = pd.read_csv("etalon_prod.csv") | |
df = pd.read_csv("preprocessed_complaints.csv") | |
model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1') | |
unique_complaints = df['Жалобы'].values.tolist() | |
with open("embeddings.npy", 'rb') as f: | |
embeddings = np.load(f) | |
def get_recommend(user_input, | |
top_k_spec = 3, | |
top_k_services = 5, | |
treshold = 0.8): | |
cols_for_top_k = ["Специальность врача", | |
"Рекомендуемые специалисты"] | |
usr_embeddings = model.encode(user_input) | |
cos_similarity = cos_sim(usr_embeddings, embeddings).detach().numpy() | |
sorted_idx = cos_similarity[0].argsort()[::-1] | |
cos_similarity.sort() | |
cos_similarity = cos_similarity[0][::-1] | |
sorted_df = df.loc[sorted_idx].copy() | |
sorted_df['cos_sim'] = cos_similarity | |
sorted_df = sorted_df[sorted_df['cos_sim'] > treshold] | |
result = {} | |
for col in cols_for_top_k: | |
result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist() | |
result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist() | |
result['Диагноз МКБ'] = sorted_df['Диагноз МКБ'].value_counts()[:top_k_services].index.tolist() | |
lst = [] | |
categories = ['Инструментальная диагностика', 'Лабораторная диагностика'] | |
for category in categories: | |
list_top_k_services = sorted_df[sorted_df['service_name_category'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist() | |
lst.append({category:list_top_k_services}) | |
result['Рекомендации по обследованию'] = lst | |
return result | |
#%% | |
gradio_app = gr.Interface( | |
get_recommend, | |
inputs='text', | |
outputs=gr.JSON(label='Рекомендации: '), | |
# title="Предсказание топ-10 наиболее схожих услуг", | |
description="Введите услугу:" | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() | |
# %% | |