Spaces:

borodache
/

hebrew-dentsit

Sleeping

hebrew-dentsit / rag_agent.py

Change the retrieval and reranking into two steps search with two different indexes - which is supposed to make the latency much lower (faster)

a983ce0 verified about 1 month ago

raw

history blame

5.06 kB

	from anthropic import Anthropic
	from typing import List
	import os


	from retriever import Retriever
	from reranker import Reranker
	from text_embedder_encoder import TextEmbedder, encoder_model_name


	retriever = Retriever()
	reranker = Reranker()


	class RAGAgent:
	def __init__(
	self,
	retriever=retriever,
	reranker=reranker,
	anthropic_api_key: str = os.environ["anthropic_api_key"],
	model_name: str = "claude-3-5-sonnet-20241022",
	max_tokens: int = 1024,
	temperature: float = 0.0,
	):
	self.retriever = retriever
	self.reranker = reranker
	self.client = Anthropic(api_key=anthropic_api_key)
	self.model_name = model_name
	self.max_tokens = max_tokens
	self.temperature = temperature
	self.text_embedder = TextEmbedder()
	self.conversation_summary = ""
	self.messages = []

	def get_context(self, query: str) -> List[str]:
	# Get initial candidates from retriever
	query_vector = self.text_embedder.encode(query)
	retrieved_answers_ids = self.retriever.search_similar(query_vector)
	# Rerank the candidates
	context = self.reranker.rerank(query_vector, retrieved_answers_ids)

	return context

	def generate_prompt(self, context: List[str], conversation_summary: str = "") -> str:
	context = "\n".join(context)
	summary_context = f"\nסיכום השיחה עד כה:\n{conversation_summary}" if conversation_summary else ""

	prompt = f"""
	אתה רופא שיניים, דובר עברית בלבד. קוראים לך 'רופא השיניים האלקטרוני העברי הראשון'.{summary_context}
	ענה למטופל על השאלה שלו על סמך הקונטקס הבא: {context}.
	הוסף כמה שיותר פרטים, ודאג שהתחביר יהיה תקין ויפה.
	תעצור כשאתה מרגיש שמיצית את עצמך. אל תמציא דברים.
	ואל תענה בשפות שהן לא עברית.
	"""
	return prompt

	def update_summary(self, question: str, answer: str) -> str:
	"""Update the conversation summary with the new interaction"""
	summary_prompt = {
	"model": self.model_name,
	"max_tokens": 500,
	"temperature": 0.0,
	"messages": [
	{
	"role": "user",
	"content": f"""סכם את השיחה בעברית, הנה סיכום השיחה עד כה:
	{self.conversation_summary if self.conversation_summary else "אין שיחה קודמת."}

	אינטראקציה חדשה:
	שאלת המטופל: {question}
	תשובת הרופא: {answer}

	אנא ספק סיכום מעודכן שכולל את המידע הרפואי מהסיכום הקודם בנוסף לדגש על האינטרקציה החדשה. הסיכום צריך להיות תמציתי עד 100 מילה.
	ותר על מידע לא רלוונטי מהסיכומים הקודמים"""
	}
	]
	}

	try:
	response = self.client.messages.create(**summary_prompt)
	self.conversation_summary = response.content[0].text
	return self.conversation_summary
	except Exception as e:
	print(f"Error updating summary: {e}")
	return self.get_basic_summary()

	def get_basic_summary(self) -> str:
	"""Fallback method for basic summary"""
	summary = []
	for i in range(0, len(self.messages), 2):
	if i + 1 < len(self.messages):
	summary.append(f"שאלת המטופל: {self.messages[i]['content']}")
	summary.append(f"תשובת הרופא שיניים: {self.messages[i + 1]['content']}\n")
	return "\n".join(summary)

	def get_response(self, question: str) -> str:
	# Get relevant context
	context = self.get_context(question + self.conversation_summary)

	# Generate prompt with context and current conversation summary
	prompt = self.generate_prompt(context, self.conversation_summary)

	# Get response from Claude
	response = self.client.messages.create(
	model=self.model_name,
	max_tokens=self.max_tokens,
	temperature=self.temperature,
	messages=[
	{"role": "assistant", "content": prompt},
	{"role": "user", "content": f"{question}"}
	]
	)

	answer = response.content[0].text

	# Store messages for history
	self.messages.extend([
	{"role": "user", "content": question},
	{"role": "assistant", "content": answer}
	])

	# Update conversation summary
	self.update_summary(question, answer)

	return answer