|
from pydantic import BaseModel,Field
|
|
from typing import TypedDict, Annotated
|
|
from langgraph.graph import MessagesState,StateGraph, START, END
|
|
from langchain_community.tools import TavilySearchResults
|
|
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
|
from langchain_community.document_loaders import WebBaseLoader
|
|
from langgraph.checkpoint.memory import MemorySaver
|
|
|
|
|
|
import operator
|
|
from setup import *
|
|
|
|
class GeneratorState(MessagesState):
|
|
context : Annotated[list, operator.add]
|
|
max_usecase : int
|
|
topic : str
|
|
|
|
|
|
class SearchQuery(BaseModel):
|
|
search_query : str = Field(description = 'Search query for web-search')
|
|
|
|
|
|
keyword_search = TavilySearchResults(
|
|
max_results=1,
|
|
search_depth="advanced",
|
|
include_answer=True,
|
|
include_raw_content=True,
|
|
include_images=True)
|
|
|
|
|
|
|
|
def search_web(state: GeneratorState):
|
|
|
|
topic = state['topic']
|
|
structured_llm = llm2.with_structured_output(SearchQuery)
|
|
|
|
search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
|
|
|
|
## **Instructions:**
|
|
- Extract the **industry name** from the user’s query.
|
|
- Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
|
|
- Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
|
|
- Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
|
|
- Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
|
|
|
|
---
|
|
## **Example:**
|
|
User Input: `"GenAI in healthcare"`
|
|
Generated Query:
|
|
"Generative AI use cases in healthcare applications and impact"
|
|
|
|
Generate search query for the below:
|
|
{topic}
|
|
"""
|
|
|
|
search_prompt = search_instructions.format(topic=topic)
|
|
search_query = structured_llm.invoke(search_prompt)
|
|
exclude_domains = ["vktr.com"]
|
|
search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
|
|
page_url = [doc['url'] for doc in search_docs]
|
|
loader = WebBaseLoader(
|
|
web_paths= page_url,
|
|
bs_get_text_kwargs={"separator": "|", "strip": True},
|
|
raise_for_status=True,
|
|
)
|
|
docs = loader.load()
|
|
formatted_search_docs = "\n\n---\n\n".join(
|
|
[
|
|
f'<Document href="{doc.metadata['source']}"/>\n{doc.page_content}\n</Document>'
|
|
for doc in docs
|
|
])
|
|
|
|
summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
|
|
Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
|
|
Focus on Key Points: Extract essential facts, insights, and takeaways.
|
|
Maintain Clarity & Coherence: Ensure logical flow and readability.
|
|
Preserve Critical Data: Retain names, dates, figures, and important references.
|
|
Adjust Length as Needed: Summarize concisely while covering all vital aspects.
|
|
Format the summary professionally, adapting tone and detail to match the context.
|
|
context : {formatted_search_docs}
|
|
'''
|
|
summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
|
|
|
|
return {'context': [summarized_docs]}
|
|
|
|
|
|
|
|
def generate_usecases(state: GeneratorState):
|
|
topic = state['topic']
|
|
context = state['context']
|
|
generation_prompt = '''
|
|
You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
|
|
topic : {topic}
|
|
context:
|
|
{context}
|
|
|
|
# Focus Title: [Provided Title]
|
|
|
|
## Introduction:
|
|
Provide a concise overview of the report's purpose and relevance.
|
|
|
|
## Use Case 1: [Descriptive Title]
|
|
**Objective/Use Case:** Summarize the goal in one or two sentences.
|
|
**AI Application:** Describe the AI technologies used.
|
|
**Cross-Functional Benefit:**
|
|
- **[Department]:** [Benefit]
|
|
- **[Department]:** [Benefit]
|
|
|
|
## Use Case 2: [Descriptive Title]
|
|
(Repeat format)
|
|
|
|
## Conclusion:
|
|
Summarize key takeaways and potential future implications.
|
|
|
|
Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
|
|
|
|
system_message = generation_prompt.format(topic=topic, context=context)
|
|
answer = llm1.invoke([SystemMessage(content=system_message)])
|
|
|
|
return {'messages': answer}
|
|
|
|
|
|
|
|
def graph(topic,max_analysts):
|
|
graph_builder = StateGraph(GeneratorState)
|
|
|
|
graph_builder.add_node('search_web', search_web)
|
|
graph_builder.add_node('usecase_generation', generate_usecases)
|
|
|
|
graph_builder.add_edge(START, 'search_web')
|
|
graph_builder.add_edge('search_web', 'usecase_generation')
|
|
graph_builder.add_edge('usecase_generation', END)
|
|
|
|
memory = MemorySaver()
|
|
graph = graph_builder.compile(checkpointer=memory)
|
|
config = {"configurable": {"thread_id": "1"}}
|
|
graph.invoke({"topic":topic,
|
|
"max_analysts":max_analysts},
|
|
config)
|
|
|
|
final_state = graph.get_state(config)
|
|
report = final_state.values['messages'][0].content
|
|
|
|
return report |