Spaces:

EricGEGE
/

AskEric

Sleeping

App Files Files Community

AskEric / app.py

EricGEGE

Update app.py

db0b299 verified about 1 year ago

raw

history blame

3.81 kB

	import numpy as np
	import pandas as pd
	import logging
	from sentence_transformers import SentenceTransformer

	from sklearn.metrics import DistanceMetric

	import warnings
	warnings.filterwarnings("ignore")
	import gradio as gr
	# from transformers.utils.hub import move_cache
	#
	# move_cache()
	from telegram import Bot

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	token = '7370905765:AAHvmlw68cW7RxWzsJE1yxTzgf3xQFiokDo'
	chat_id = '7431171535'
	pd.set_option('display.max_colwidth', None)
	df_resume = pd.read_csv('QA.csv',encoding='latin-1')
	# df_resume['role'][df_resume['role'].iloc[-1] == df_resume['role']] = "Other" # relabel random role as "other"
	print(df_resume.head())
	model = SentenceTransformer("all-MiniLM-L6-v2")
	# file_name = 'questions.txt'

	def send_telegram_message(token, chat_id, message):
	try:
	bot = Bot(token=token)
	bot.send_message(chat_id=chat_id, text=message)
	logger.info("Message sent successfully.")
	except TelegramError as e:
	logger.error(f"Failed to send message: {e}")


	def savevectorstore():
	# store embed vectors
	embedding_arr = model.encode(df_resume['question'])
	print(embedding_arr.shape)
	np.save('embeddings.npy', embedding_arr)
	# savevectorstore()
	# load embed vectors
	def rag_chain(question,name):
	embedding_arr = np.load('embeddings.npy')
	# print(embedding_arr.shape)

	# pca = PCA(n_components=2).fit(embedding_arr)
	# print(pca.explained_variance_ratio_)

	query = question

	query_embedding = model.encode(query)

	dist = DistanceMetric.get_metric('euclidean') # other distances: manhattan, chebyshev

	# compute pair wise distances between query embedding and all resume embeddings
	dist_arr = dist.pairwise(embedding_arr, query_embedding.reshape(1, -1)).flatten()
	# sort results
	idist_arr_sorted = np.argsort(dist_arr)

	# print(df_resume['name'].iloc[idist_arr_sorted[:1]].to_string(index=False))
	# print(df_resume['name'].iloc[idist_arr_sorted[:1]])

	que = f"Most relevant question.<br><br>{str(df_resume['question'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('\|\|','<br>')}"
	# que = df_resume['question'].iloc[idist_arr_sorted[:1]]
	ans = f"Answer ideas.<br><br>{str(df_resume['answer'].iloc[idist_arr_sorted[:1]].to_string(index=False)).replace('\|\|','<br>')}"
	# ans = df_resume['answer'].iloc[idist_arr_sorted[:1]]
	# profit = df_resume['profit'].iloc[idist_arr_sorted[:1]].to_string(index=False)
	# product = df_resume['product'].iloc[idist_arr_sorted[:1]].to_string(index=False)
	# que1 = que
	# ans1 = ans
	# print(que1)
	# print(ans1)
	# que2= f'''Most relevant question
	#
	#
	# {que1}'''
	# ans2= f'''Answer ideas
	#
	#
	# {ans1}'''
	# print(que2)
	# print(ans2)
	# with open('questions.txt', 'a',encoding='utf-8') as file:
	# file.write(f"\n\n{question}\n\n{country}\n\n{whatsapp}\n\n{name}\|\|{sales}\|\|{profit}\|\|{product}")
	message = f"{name}\n\n{question}\n\n{ans}"
	if not token or not chat_id:
	logger.error("Please set the TELEGRAM_BOT_TOKEN and TELEGRAM_CHAT_ID environment variables.")
	else:
	send_telegram_message(token, chat_id, message)
	return que,ans
	# rag_chain('I am very hungry.')

	desc = "This is an awesome ML App. I'm really excited to show you"
	long_desc = "如果我没有回答你的问题，把问题发给Eric吧。"
	search_interface = gr.Interface(
	fn=rag_chain,
	inputs=[gr.Textbox(label="Question"),gr.Textbox(label="Name")],
	outputs=[gr.Markdown(label="Most relevant question"),gr.Markdown(label="Answer ideas")],
	title="Ask Eric",
	description="Hi,我是数字分身，欢迎提问！",
	# theme=gr.themes.Glass
	article=long_desc
	)

	search_interface.launch(share=True,debug=True)