Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,104 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
|
6 |
+
# from sklearn.decomposition import PCA
|
7 |
+
from sklearn.metrics import DistanceMetric
|
8 |
+
|
9 |
+
# import matplotlib.pyplot as plt
|
10 |
+
# import matplotlib as mpl
|
11 |
+
import warnings
|
12 |
+
warnings.filterwarnings("ignore")
|
13 |
+
import gradio as gr
|
14 |
+
# from transformers.utils.hub import move_cache
|
15 |
+
#
|
16 |
+
# move_cache()
|
17 |
+
# from mailersend import emails
|
18 |
+
|
19 |
+
import asyncio
|
20 |
+
from telegram import Bot
|
21 |
+
|
22 |
+
token = '7370905765:AAHvmlw68cW7RxWzsJE1yxTzgf3xQFiokDo'
|
23 |
+
chat_id = '7431171535'
|
24 |
+
pd.set_option('display.max_colwidth', None)
|
25 |
+
df_resume = pd.read_csv('QA.csv',encoding='latin-1')
|
26 |
+
# df_resume['role'][df_resume['role'].iloc[-1] == df_resume['role']] = "Other" # relabel random role as "other"
|
27 |
+
print(df_resume.head())
|
28 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
29 |
+
# file_name = 'questions.txt'
|
30 |
+
|
31 |
+
async def send_telegram_message(token, chat_id, message):
|
32 |
+
bot = Bot(token=token)
|
33 |
+
await bot.send_message(chat_id=chat_id, text=message)
|
34 |
+
print("Message sent successfully.")
|
35 |
+
|
36 |
+
|
37 |
+
def savevectorstore():
|
38 |
+
# store embed vectors
|
39 |
+
embedding_arr = model.encode(df_resume['question'])
|
40 |
+
print(embedding_arr.shape)
|
41 |
+
np.save('embeddings.npy', embedding_arr)
|
42 |
+
# savevectorstore()
|
43 |
+
# load embed vectors
|
44 |
+
def rag_chain(question,name):
|
45 |
+
embedding_arr = np.load('embeddings.npy')
|
46 |
+
# print(embedding_arr.shape)
|
47 |
+
|
48 |
+
# pca = PCA(n_components=2).fit(embedding_arr)
|
49 |
+
# print(pca.explained_variance_ratio_)
|
50 |
+
|
51 |
+
query = question
|
52 |
+
|
53 |
+
query_embedding = model.encode(query)
|
54 |
+
|
55 |
+
dist = DistanceMetric.get_metric('euclidean') # other distances: manhattan, chebyshev
|
56 |
+
|
57 |
+
# compute pair wise distances between query embedding and all resume embeddings
|
58 |
+
dist_arr = dist.pairwise(embedding_arr, query_embedding.reshape(1, -1)).flatten()
|
59 |
+
# sort results
|
60 |
+
idist_arr_sorted = np.argsort(dist_arr)
|
61 |
+
|
62 |
+
# print(df_resume['name'].iloc[idist_arr_sorted[:1]].to_string(index=False))
|
63 |
+
# print(df_resume['name'].iloc[idist_arr_sorted[:1]])
|
64 |
+
|
65 |
+
que = f"**Most relevant question.**<br><br>{str(df_resume['question'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('||','<br>')}"
|
66 |
+
# que = df_resume['question'].iloc[idist_arr_sorted[:1]]
|
67 |
+
ans = f"**Answer ideas.**<br><br>{str(df_resume['answer'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('||','<br>')}"
|
68 |
+
# ans = df_resume['answer'].iloc[idist_arr_sorted[:1]]
|
69 |
+
# profit = df_resume['profit'].iloc[idist_arr_sorted[:1]].to_string(index=False)
|
70 |
+
# product = df_resume['product'].iloc[idist_arr_sorted[:1]].to_string(index=False)
|
71 |
+
# que1 = que
|
72 |
+
# ans1 = ans
|
73 |
+
# print(que1)
|
74 |
+
# print(ans1)
|
75 |
+
# que2= f'''Most relevant question
|
76 |
+
#
|
77 |
+
#
|
78 |
+
# {que1}'''
|
79 |
+
# ans2= f'''Answer ideas
|
80 |
+
#
|
81 |
+
#
|
82 |
+
# {ans1}'''
|
83 |
+
# print(que2)
|
84 |
+
# print(ans2)
|
85 |
+
# with open('questions.txt', 'a',encoding='utf-8') as file:
|
86 |
+
# file.write(f"\n\n{question}\n\n{country}\n\n{whatsapp}\n\n{name}||{sales}||{profit}||{product}")
|
87 |
+
message = f"{name}\n\n{question}\n\n{ans}"
|
88 |
+
asyncio.run(send_telegram_message(token, chat_id, message))
|
89 |
+
return que,ans
|
90 |
+
# rag_chain('I am very hungry.')
|
91 |
+
|
92 |
+
desc = "This is an awesome ML App. I'm really excited to show you"
|
93 |
+
long_desc = "如果我没有回答你的问题,把问题发给Eric吧。"
|
94 |
+
search_interface = gr.Interface(
|
95 |
+
fn=rag_chain,
|
96 |
+
inputs=[gr.Textbox(label="Question"),gr.Textbox(label="Name")],
|
97 |
+
outputs=[gr.Markdown(label="Most relevant question"),gr.Markdown(label="Answer ideas")],
|
98 |
+
title="Ask Eric",
|
99 |
+
description="Hi,我是数字分身,欢迎提问!",
|
100 |
+
# theme=gr.themes.Glass
|
101 |
+
article=long_desc
|
102 |
+
)
|
103 |
+
|
104 |
+
search_interface.launch(share=True,debug=True)
|