File size: 5,529 Bytes
cdefda1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d647c7e
 
cdefda1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01ade1d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from pydantic import BaseModel,Field
from typing import TypedDict, Annotated
from langgraph.graph import MessagesState,StateGraph, START, END
from langchain_community.tools import TavilySearchResults
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_community.document_loaders import WebBaseLoader
from langgraph.checkpoint.memory import MemorySaver


import operator
from setup import *

class GeneratorState(MessagesState):
  context : Annotated[list, operator.add] # retrived docs
  max_usecase : int # no of usecase to generate
  topic : str # input query


class SearchQuery(BaseModel):
  search_query : str = Field(description = 'Search query for web-search')


keyword_search = TavilySearchResults(
  max_results=1,
  search_depth="advanced",
  include_answer=True,
  include_raw_content=True,
  include_images=True)



def search_web(state: GeneratorState):
  
  topic = state['topic']
  structured_llm = llm2.with_structured_output(SearchQuery)
  
  search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.  

    ## **Instructions:**
    - Extract the **industry name** from the user’s query.
    - Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
    - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
    - Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
    - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.

    ---
    ## **Example:**
    User Input: `"GenAI in healthcare"`  
    Generated Query:  
    "Generative AI use cases in healthcare applications and impact"

    Generate search query for the below:
    {topic}
    """

  search_prompt = search_instructions.format(topic=topic)
  search_query = structured_llm.invoke(search_prompt)
  exclude_domains = ["vktr.com"]
  search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
  page_url = [doc['url'] for doc in search_docs]
  loader = WebBaseLoader(
    web_paths= page_url,
    bs_get_text_kwargs={"separator": "|", "strip": True},
    raise_for_status=True,
  )
  docs = loader.load()
  formatted_search_docs = "\n\n---\n\n".join(
    [
      f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
      for doc in docs
    ])
  
  summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context. 
    Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
    Focus on Key Points: Extract essential facts, insights, and takeaways.
    Maintain Clarity & Coherence: Ensure logical flow and readability.
    Preserve Critical Data: Retain names, dates, figures, and important references.
    Adjust Length as Needed: Summarize concisely while covering all vital aspects.
    Format the summary professionally, adapting tone and detail to match the context.
    context : {formatted_search_docs}
  '''
  summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])

  return {'context': [summarized_docs]}



def generate_usecases(state: GeneratorState):
  topic = state['topic']
  context = state['context']
  generation_prompt = '''
    You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:  
    topic : {topic}
    context:
    {context}

    # Focus Title: [Provided Title]  

    ## Introduction:
      Provide a concise overview of the report's purpose and relevance.  

    ## Use Case 1: [Descriptive Title]  
    **Objective/Use Case:** Summarize the goal in one or two sentences.  
    **AI Application:** Describe the AI technologies used.  
    
    ### Cross-Functional Benefit:
    - **[Department]:** [Benefit]  
    - **[Department]:** [Benefit]  

    ## Use Case 2: [Descriptive Title]  
    (Repeat format)  

    ## Conclusion:
      Summarize key takeaways and potential future implications.  

    Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''

  system_message = generation_prompt.format(topic=topic, context=context)
  answer = llm1.invoke([SystemMessage(content=system_message)])

  return {'messages': answer}



def graph(topic,max_analysts):
  graph_builder = StateGraph(GeneratorState)

  graph_builder.add_node('search_web', search_web)
  graph_builder.add_node('usecase_generation', generate_usecases)

  graph_builder.add_edge(START, 'search_web')
  graph_builder.add_edge('search_web', 'usecase_generation')
  graph_builder.add_edge('usecase_generation', END)

  memory = MemorySaver()
  graph = graph_builder.compile(checkpointer=memory)
  config = {"configurable": {"thread_id": "1"}}
  graph.invoke({"topic":topic,
                "max_analysts":max_analysts}, 
                config)
  
  final_state = graph.get_state(config)
  report = final_state.values['messages'][0].content
  
  return report