from pydantic import BaseModel,Field
from typing import TypedDict, Annotated
from langgraph.graph import MessagesState,StateGraph, START, END
from langchain_community.tools import TavilySearchResults
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_community.document_loaders import WebBaseLoader
from langgraph.checkpoint.memory import MemorySaver


import operator
from setup import *

class GeneratorState(MessagesState):
  context : Annotated[list, operator.add] # retrived docs
  max_usecase : int # no of usecase to generate
  topic : str # input query


class SearchQuery(BaseModel):
  search_query : str = Field(description = 'Search query for web-search')


keyword_search = TavilySearchResults(
  max_results=1,
  search_depth="advanced",
  include_answer=True,
  include_raw_content=True,
  include_images=True)


def search_web(state: GeneratorState):
  
  topic = state['topic']
  structured_llm = llm2.with_structured_output(SearchQuery)
  
  search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.  

    ## **Instructions:**
    - Extract the **industry name** from the user’s query.
    - Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
    - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
    - Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
    - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.

    ---
    ## **Example:**
    User Input: `"GenAI in healthcare"`  
    Generated Query:  
    "Generative AI use cases in healthcare applications and impact"

    Generate search query for the below:
    {topic}
    """

  search_prompt = search_instructions.format(topic=topic)
  search_query = structured_llm.invoke(search_prompt)
  exclude_domains = ["vktr.com"]
  search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
  page_url = [doc['url'] for doc in search_docs]
  loader = WebBaseLoader(
    web_paths= page_url,
    bs_get_text_kwargs={"separator": "|", "strip": True},
    raise_for_status=True,
  )
  docs = loader.load()
  formatted_search_docs = "\n\n---\n\n".join(
    [
      f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
      for doc in docs
    ])
  
  summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context. 
    Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
    Focus on Key Points: Extract essential facts, insights, and takeaways.
    Maintain Clarity & Coherence: Ensure logical flow and readability.
    Preserve Critical Data: Retain names, dates, figures, and important references.
    Adjust Length as Needed: Summarize concisely while covering all vital aspects.
    Format the summary professionally, adapting tone and detail to match the context.
    context : {formatted_search_docs}
  '''
  summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])

  return {'context': [summarized_docs]}


def generate_usecases(state: GeneratorState):
  topic = state['topic']
  context = state['context']
  generation_prompt = '''
    You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:  
    topic : {topic}
    context:
    {context}

    # Focus Title: [Provided Title]  

    ## Introduction:
      Provide a concise overview of the report's purpose and relevance.  

    ## Use Case 1: [Descriptive Title]  
    **Objective/Use Case:** Summarize the goal in one or two sentences.  
    **AI Application:** Describe the AI technologies used.  
    
    ### Cross-Functional Benefit:
    - **[Department]:** [Benefit]  
    - **[Department]:** [Benefit]  

    ## Use Case 2: [Descriptive Title]  
    (Repeat format)  

    ## Conclusion:
      Summarize key takeaways and potential future implications.  

    Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''

  system_message = generation_prompt.format(topic=topic, context=context)
  answer = llm1.invoke([SystemMessage(content=system_message)])

  return {'messages': answer}


def graph(topic,max_analysts):
  graph_builder = StateGraph(GeneratorState)

  graph_builder.add_node('search_web', search_web)
  graph_builder.add_node('usecase_generation', generate_usecases)

  graph_builder.add_edge(START, 'search_web')
  graph_builder.add_edge('search_web', 'usecase_generation')
  graph_builder.add_edge('usecase_generation', END)

  memory = MemorySaver()
  graph = graph_builder.compile(checkpointer=memory)
  config = {"configurable": {"thread_id": "1"}}
  graph.invoke({"topic":topic,
                "max_analysts":max_analysts}, 
                config)
  
  final_state = graph.get_state(config)
  report = final_state.values['messages'][0].content
  
  return report