Spaces:

AlanFeder
/

Simple_Gradio_RAG_App_of_RGov_Talks

Running

App Files Files Community

Simple_Gradio_RAG_App_of_RGov_Talks / rag_fns /generation.py

AlanFeder

Upload folder using huggingface_hub

1c4216d verified 9 months ago

raw

history blame contribute delete

5.45 kB

	import logging
	from typing import Any

	from openai import Stream

	from rag_utils.rag_utils import calc_n_tokens

	from .setup_load import OpenAI

	# from langsmith import traceable

	logger = logging.getLogger()

	SYSTEM_PROMPT = """
	You are an AI assistant that helps answer questions by searching through video transcripts.
	I have retrieved the transcripts most likely to answer the user's question.
	Carefully read through the transcripts to find information that helps answer the question.
	Be brief - your response should not be more than two paragraphs.
	Only use information directly stated in the provided transcripts to answer the question.
	Do not add any information or make any claims that are not explicitly supported by the transcripts.
	If the transcripts do not contain enough information to answer the question, state that you do not have enough information to provide a complete answer.
	Format the response clearly. If only one of the transcripts answers the question, don't reference the other and don't explain why its content is irrelevant.
	Do not speak in the first person. DO NOT write a letter, make an introduction, or salutation.
	Reference the speaker's name when you say what they said.
	"""


	def set_messages(system_prompt: str, user_prompt: str) -> tuple[list[dict[str, str]], int]:
	"""
	Set the messages for the chat completion.

	Args:
	system_prompt (str): The system prompt.
	user_prompt (str): The user prompt.

	Returns:
	tuple[list[dict[str, str]], int]: A tuple containing the messages and the total number of input tokens.
	"""
	messages1 = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]
	n_system_tokens = calc_n_tokens(system_prompt)
	n_user_tokens = calc_n_tokens(user_prompt)
	n_input_tokens = n_system_tokens + n_user_tokens
	logger.info(
	f"System Prompt is {n_system_tokens} tokens, User Prompt is {n_user_tokens} tokens"
	)
	return messages1, n_input_tokens


	def make_user_prompt(question: str, keep_texts: list[dict[str, Any]]) -> str:
	"""
	Create the user prompt based on the question and the retrieved transcripts.

	Args:
	question (str): The user's question.
	keep_texts (dict[str, dict[str, str]]): The retrieved transcripts.

	Returns:
	str: The user prompt.
	"""
	user_prompt = f"""
	Question: {question}
	==============================
	"""
	if len(keep_texts) > 0:
	list_strs = []
	for i, tx_val in enumerate(keep_texts):
	text0 = tx_val["text"]
	speaker_name = tx_val["Speaker"]
	list_strs.append(f"Video Transcript {i+1}\nSpeaker: {speaker_name}\n{text0}")
	user_prompt += "\n-------\n".join(list_strs)
	user_prompt += """
	==============================
	After analyzing the above video transcripts, please provide a helpful answer to my question. Remember to stay within two paragraphs
	Address the response to me directly. Do not use any information not explicitly supported by the transcripts. Remember to reference the speaker's name."""
	else:
	# If no relevant transcripts are found, generate a default response
	user_prompt += "No relevant video transcripts were found. Please just return a result that says something like 'I'm sorry, but the answer to {Question} was not found in the transcripts from the R/Gov Conference'"
	# logger.info(f'User prompt: {user_prompt}')
	return user_prompt


	# def concatenate_strings(outputs: list):
	# return "".join(outputs)


	# @traceable(reduce_fn=concatenate_strings)
	def do_1_query(messages1: list[dict[str, str]], gen_client: OpenAI, stream: bool = False):
	"""
	Generate a response using the specified chat completion model.

	Args:
	messages1 (list[dict[str, str]]): The messages for the chat completion.
	gen_client (OpenAI): The generation client (OpenAI).

	Returns:
	Stream: The generated response stream.
	"""

	model1 = "gpt-4o"

	# Generate the response using the specified model
	response1 = gen_client.chat.completions.create(
	messages=messages1, model=model1, seed=18, temperature=0, stream=stream
	)

	return response1


	def text_from_response_static(response1) -> str:
	return response1.choices[0].message.content


	def text_from_response_stream(response1: Stream):
	for chunk in response1:
	content = chunk.choices[0].delta.content
	if content is not None:
	yield content


	def do_generation(
	query1: str, keep_texts: list[dict[str, Any]], gen_client: OpenAI, stream: bool = False
	):
	"""
	Generate the chatbot response using the specified generation client.

	Args:
	query1 (str): The user's query.
	keep_texts (dict[str, dict[str, str]]): The retrieved relevant texts.
	gen_client (OpenAI): The generation client (OpenAI).

	Returns:
	tuple[Stream, int]: A tuple containing the generated response stream and the number of prompt tokens.
	"""
	user_prompt = make_user_prompt(query1, keep_texts=keep_texts)
	messages1, prompt_tokens = set_messages(SYSTEM_PROMPT, user_prompt)
	response = do_1_query(messages1, gen_client, stream=stream)

	if stream:
	response1 = text_from_response_stream(response1=response)
	else:
	response1 = text_from_response_static(response1=response)

	return response1, prompt_tokens