File size: 2,315 Bytes
7b86ace
 
 
 
 
 
 
 
3326af6
 
7b86ace
 
e324e37
7b86ace
 
 
 
3326af6
7b86ace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#%%
import pandas as pd
import numpy as np
import torch
from sentence_transformers.util import cos_sim
from sentence_transformers import SentenceTransformer
import gradio as gr
#%%
etalon = pd.read_csv("etalon_prod.csv")
df = pd.read_csv("preprocessed_train_classify_rec_spec_filtered_by_etalon.csv")
df = df[df['is_match'] == 1]

model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')


unique_complaints = df['Жалобы'].values.tolist()

with open("embeddings.npy", 'rb') as f:
    unique_complaints_embeddings_st = np.load(f)

def get_recommend(user_input, 
                  top_k_spec = 3,
                  top_k_services = 10,
                  treshold = 0.8):
    
    cols_for_top_k = ["Специальность врача",
                      "Рекомендуемые специалисты"]
    
    usr_embeddings = model.encode(user_input)

    cos_similarity = cos_sim(usr_embeddings, unique_complaints_embeddings_st).detach().numpy()
    sorted_idx = cos_similarity[0].argsort()[::-1]
    cos_similarity.sort()
    
    cos_similarity = cos_similarity[0][::-1]
    
    sorted_df = df.loc[sorted_idx].copy()
    sorted_df['cos_sim'] = cos_similarity
    sorted_df = sorted_df[sorted_df['cos_sim'] > treshold]
    
    result = {}
    for col in cols_for_top_k:
        result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist()
    result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist()
    
    lst = []    
    categories = ['Инструментальная диагностика', 'Лабораторная диагностика']
    for category in categories:
        list_top_k_services = sorted_df[sorted_df['preds'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist()
        lst.append({category:list_top_k_services})
    
    result['Рекомендации по обследованию'] = lst

    return result
#%%
gradio_app = gr.Interface(
    get_recommend,
    inputs='text',
    outputs=gr.JSON(label='s'),
    # title="Предсказание топ-10 наиболее схожих услуг",
    description="Введите услугу:"
)

if __name__ == "__main__":
    gradio_app.launch()
# %%