from pydantic import BaseModel,Field from typing import TypedDict, Annotated from langgraph.graph import MessagesState,StateGraph, START, END from langchain_community.tools import TavilySearchResults from langchain_core.messages import SystemMessage, HumanMessage, AIMessage from langchain_community.document_loaders import WebBaseLoader from langgraph.checkpoint.memory import MemorySaver import operator from setup import * class GeneratorState(MessagesState): context : Annotated[list, operator.add] # retrived docs max_usecase : int # no of usecase to generate topic : str # input query class SearchQuery(BaseModel): search_query : str = Field(description = 'Search query for web-search') keyword_search = TavilySearchResults( max_results=1, search_depth="advanced", include_answer=True, include_raw_content=True, include_images=True) def search_web(state: GeneratorState): topic = state['topic'] structured_llm = llm2.with_structured_output(SearchQuery) search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search. ## **Instructions:** - Extract the **industry name** from the user’s query. - Generate a **focused search query** that retrieves **practical AI use cases** in that industry. - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search. - Prioritize sources like **research papers, industry reports, and authoritative tech sites**. - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable. --- ## **Example:** User Input: `"GenAI in healthcare"` Generated Query: "Generative AI use cases in healthcare applications and impact" Generate search query for the below: {topic} """ search_prompt = search_instructions.format(topic=topic) search_query = structured_llm.invoke(search_prompt) exclude_domains = ["vktr.com"] search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains) page_url = [doc['url'] for doc in search_docs] loader = WebBaseLoader( web_paths= page_url, bs_get_text_kwargs={"separator": "|", "strip": True}, raise_for_status=True, ) docs = loader.load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in docs ]) summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context. Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines: Focus on Key Points: Extract essential facts, insights, and takeaways. Maintain Clarity & Coherence: Ensure logical flow and readability. Preserve Critical Data: Retain names, dates, figures, and important references. Adjust Length as Needed: Summarize concisely while covering all vital aspects. Format the summary professionally, adapting tone and detail to match the context. context : {formatted_search_docs} ''' summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)]) return {'context': [summarized_docs]} def generate_usecases(state: GeneratorState): topic = state['topic'] context = state['context'] generation_prompt = ''' You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format: topic : {topic} context: {context} # Focus Title: [Provided Title] ## Introduction: Provide a concise overview of the report's purpose and relevance. ## Use Case 1: [Descriptive Title] **Objective/Use Case:** Summarize the goal in one or two sentences. **AI Application:** Describe the AI technologies used. ### Cross-Functional Benefit: - **[Department]:** [Benefit] - **[Department]:** [Benefit] ## Use Case 2: [Descriptive Title] (Repeat format) ## Conclusion: Summarize key takeaways and potential future implications. Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.''' system_message = generation_prompt.format(topic=topic, context=context) answer = llm1.invoke([SystemMessage(content=system_message)]) return {'messages': answer} def graph(topic,max_analysts): graph_builder = StateGraph(GeneratorState) graph_builder.add_node('search_web', search_web) graph_builder.add_node('usecase_generation', generate_usecases) graph_builder.add_edge(START, 'search_web') graph_builder.add_edge('search_web', 'usecase_generation') graph_builder.add_edge('usecase_generation', END) memory = MemorySaver() graph = graph_builder.compile(checkpointer=memory) config = {"configurable": {"thread_id": "1"}} graph.invoke({"topic":topic, "max_analysts":max_analysts}, config) final_state = graph.get_state(config) report = final_state.values['messages'][0].content return report