import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.decomposition import PCA from sklearn.metrics import DistanceMetric import matplotlib.pyplot as plt import matplotlib as mpl import warnings warnings.filterwarnings("ignore") import gradio as gr # from transformers.utils.hub import move_cache # # move_cache() from mailersend import emails import asyncio from telegram import Bot token = '7370905765:AAHvmlw68cW7RxWzsJE1yxTzgf3xQFiokDo' chat_id = '7431171535' pd.set_option('display.max_colwidth', None) df_resume = pd.read_csv('QA.csv',encoding='latin-1') # df_resume['role'][df_resume['role'].iloc[-1] == df_resume['role']] = "Other" # relabel random role as "other" print(df_resume.head()) model = SentenceTransformer("all-MiniLM-L6-v2") # file_name = 'questions.txt' async def send_telegram_message(token, chat_id, message): bot = Bot(token=token) await bot.send_message(chat_id=chat_id, text=message) print("Message sent successfully.") def savevectorstore(): # store embed vectors embedding_arr = model.encode(df_resume['question']) print(embedding_arr.shape) np.save('embeddings.npy', embedding_arr) # savevectorstore() # load embed vectors def rag_chain(question,name): embedding_arr = np.load('embeddings.npy') # print(embedding_arr.shape) # pca = PCA(n_components=2).fit(embedding_arr) # print(pca.explained_variance_ratio_) query = question query_embedding = model.encode(query) dist = DistanceMetric.get_metric('euclidean') # other distances: manhattan, chebyshev # compute pair wise distances between query embedding and all resume embeddings dist_arr = dist.pairwise(embedding_arr, query_embedding.reshape(1, -1)).flatten() # sort results idist_arr_sorted = np.argsort(dist_arr) # print(df_resume['name'].iloc[idist_arr_sorted[:1]].to_string(index=False)) # print(df_resume['name'].iloc[idist_arr_sorted[:1]]) que = f"**Most relevant question.**

{str(df_resume['question'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('||','
')}" # que = df_resume['question'].iloc[idist_arr_sorted[:1]] ans = f"**Answer ideas.**

{str(df_resume['answer'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('||','
')}" # ans = df_resume['answer'].iloc[idist_arr_sorted[:1]] # profit = df_resume['profit'].iloc[idist_arr_sorted[:1]].to_string(index=False) # product = df_resume['product'].iloc[idist_arr_sorted[:1]].to_string(index=False) # que1 = que # ans1 = ans # print(que1) # print(ans1) # que2= f'''Most relevant question # # # {que1}''' # ans2= f'''Answer ideas # # # {ans1}''' # print(que2) # print(ans2) # with open('questions.txt', 'a',encoding='utf-8') as file: # file.write(f"\n\n{question}\n\n{country}\n\n{whatsapp}\n\n{name}||{sales}||{profit}||{product}") message = f"{name}\n\n{question}\n\n{ans}" asyncio.run(send_telegram_message(token, chat_id, message)) print('Successfully sent!') return que,ans # rag_chain('I am very hungry.') desc = "This is an awesome ML App. I'm really excited to show you" long_desc = "如果我没有回答你的问题,把问题发给Eric吧。" search_interface = gr.Interface( fn=rag_chain, inputs=[gr.Textbox(label="Question"),gr.Textbox(label="Name")], outputs=[gr.Markdown(label="Most relevant question"),gr.Markdown(label="Answer ideas")], title="Ask Eric", description="Hi,我是数字分身,欢迎提问!", # theme=gr.themes.Glass article=long_desc ) search_interface.launch(share=True,debug=True)