File size: 2,361 Bytes
7b86ace
 
 
 
 
 
 
 
3326af6
50ed058
7b86ace
50ed058
7b86ace
 
 
 
3326af6
50ed058
7b86ace
 
 
50ed058
7b86ace
 
 
 
 
 
 
50ed058
7b86ace
 
 
 
 
 
 
 
 
 
 
 
 
50ed058
7b86ace
 
 
 
d99520f
7b86ace
 
 
 
 
 
 
 
 
50ed058
7b86ace
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#%%
import pandas as pd
import numpy as np
import torch
from sentence_transformers.util import cos_sim
from sentence_transformers import SentenceTransformer
import gradio as gr
#%%
etalon = pd.read_csv("etalon_prod.csv")
df = pd.read_csv("preprocessed_complaints.csv")

model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1')


unique_complaints = df['Жалобы'].values.tolist()

with open("embeddings.npy", 'rb') as f:
    embeddings = np.load(f)

def get_recommend(user_input, 
                  top_k_spec = 3,
                  top_k_services = 5,
                  treshold = 0.8):
    
    cols_for_top_k = ["Специальность врача",
                      "Рекомендуемые специалисты"]
    
    usr_embeddings = model.encode(user_input)

    cos_similarity = cos_sim(usr_embeddings, embeddings).detach().numpy()
    sorted_idx = cos_similarity[0].argsort()[::-1]
    cos_similarity.sort()
    
    cos_similarity = cos_similarity[0][::-1]
    
    sorted_df = df.loc[sorted_idx].copy()
    sorted_df['cos_sim'] = cos_similarity
    sorted_df = sorted_df[sorted_df['cos_sim'] > treshold]
    
    result = {}
    for col in cols_for_top_k:
        result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist()
    result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist()
    result['Диагноз МКБ'] = sorted_df['Диагноз МКБ'].value_counts()[:top_k_services].index.tolist()
    
    lst = []    
    categories = ['Инструментальная диагностика', 'Лабораторная диагностика']
    for category in categories:
        list_top_k_services = sorted_df[sorted_df['service_name_category'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist()
        lst.append({category:list_top_k_services})
    
    result['Рекомендации по обследованию'] = lst

    return result
#%%
gradio_app = gr.Interface(
    get_recommend,
    inputs='text',
    outputs=gr.JSON(label='Рекомендации: '),
    # title="Предсказание топ-10 наиболее схожих услуг",
    description="Введите услугу:"
)

if __name__ == "__main__":
    gradio_app.launch()
# %%