Spaces:

dataera2013
/

podcraft_web_app

Sleeping

podcraft_web_app / backend /app /agents /researcher.py

Nagesh Muralidhar

Initial commit of PodCraft application

fd52f31 5 months ago

5.55 kB

	from langchain_core.prompts import ChatPromptTemplate
	from langchain_openai import ChatOpenAI
	from langchain_community.tools.tavily_search import TavilySearchResults
	from langchain.agents import AgentExecutor, create_openai_functions_agent
	from decouple import config
	from typing import AsyncGenerator, List
	import os
	import json

	# Get API keys from environment
	TAVILY_API_KEY = config('TAVILY_API_KEY')
	OPENAI_API_KEY = config('OPENAI_API_KEY')

	# Debug logging
	print(f"\nLoaded OpenAI API Key: {OPENAI_API_KEY[:7]}...")
	print(f"Key starts with 'sk-proj-': {OPENAI_API_KEY.startswith('sk-proj-')}")
	print(f"Key starts with 'sk-': {OPENAI_API_KEY.startswith('sk-')}\n")

	# Set Tavily API key in environment
	os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY

	# Initialize the search tool
	search_tool = TavilySearchResults(tavily_api_key=TAVILY_API_KEY)

	# List of available tools for the prompt
	tools_description = """
	Available tools:
	- TavilySearchResults: A search tool that provides comprehensive web search results. Use this to gather information about topics.
	"""

	# Create the prompt template
	researcher_prompt = ChatPromptTemplate.from_messages([
	("system", """You are an expert researcher tasked with gathering comprehensive information on given topics.
	Your goal is to provide detailed, factual information limited to 500 words.
	Focus on key points, recent developments, and verified facts.
	Structure your response clearly with main points and supporting details.
	Keep your response concise and focused.

	{tools}

	Remember to provide accurate and up-to-date information."""),
	("user", "{input}"),
	("assistant", "{agent_scratchpad}")
	])

	# Initialize the LLM with streaming
	researcher_llm = ChatOpenAI(
	model="gpt-4o-mini",
	temperature=0.3,
	api_key=OPENAI_API_KEY,
	streaming=True
	)

	# Create the agent
	researcher_agent = create_openai_functions_agent(
	llm=researcher_llm,
	prompt=researcher_prompt,
	tools=[search_tool]
	)

	# Create the agent executor
	researcher_executor = AgentExecutor(
	agent=researcher_agent,
	tools=[search_tool],
	verbose=True,
	handle_parsing_errors=True,
	return_intermediate_steps=True
	)

	def chunk_text(text: str, max_length: int = 3800) -> List[str]:
	"""Split text into chunks of maximum length while preserving sentence boundaries."""
	# Split into sentences and trim whitespace
	sentences = [s.strip() for s in text.split('.')]
	sentences = [s + '.' for s in sentences if s]

	chunks = []
	current_chunk = []
	current_length = 0

	for sentence in sentences:
	sentence_length = len(sentence)
	if current_length + sentence_length > max_length:
	if current_chunk: # If we have accumulated sentences, join them and add to chunks
	chunks.append(' '.join(current_chunk))
	current_chunk = [sentence]
	current_length = sentence_length
	else: # If a single sentence is too long, split it
	if sentence_length > max_length:
	words = sentence.split()
	temp_chunk = []
	temp_length = 0
	for word in words:
	if temp_length + len(word) + 1 > max_length:
	chunks.append(' '.join(temp_chunk))
	temp_chunk = [word]
	temp_length = len(word)
	else:
	temp_chunk.append(word)
	temp_length += len(word) + 1
	if temp_chunk:
	chunks.append(' '.join(temp_chunk))
	else:
	chunks.append(sentence)
	else:
	current_chunk.append(sentence)
	current_length += sentence_length

	if current_chunk:
	chunks.append(' '.join(current_chunk))

	return chunks

	async def research_topic_stream(topic: str) -> AsyncGenerator[str, None]:
	"""
	Research a topic and stream the results as they are generated.
	"""
	try:
	async for chunk in researcher_executor.astream(
	{
	"input": f"Research this topic thoroughly: {topic}",
	"tools": tools_description
	}
	):
	if isinstance(chunk, dict):
	# Stream intermediate steps for transparency
	if "intermediate_steps" in chunk:
	for step in chunk["intermediate_steps"]:
	yield json.dumps({"type": "intermediate", "content": str(step)}) + "\n"

	# Stream the final output
	if "output" in chunk:
	yield json.dumps({"type": "final", "content": chunk["output"]}) + "\n"
	else:
	yield json.dumps({"type": "chunk", "content": str(chunk)}) + "\n"
	except Exception as e:
	yield json.dumps({"type": "error", "content": str(e)}) + "\n"

	async def research_topic(topic: str) -> str:
	"""
	Research a topic and return the complete result.
	Kept for compatibility with existing code.
	"""
	try:
	result = await researcher_executor.ainvoke(
	{
	"input": f"Research this topic thoroughly: {topic}",
	"tools": tools_description
	}
	)
	return result["output"]
	except Exception as e:
	print(f"Error in research: {str(e)}")
	return "Error occurred during research."