File size: 5,529 Bytes
cdefda1 d647c7e cdefda1 01ade1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
from pydantic import BaseModel,Field
from typing import TypedDict, Annotated
from langgraph.graph import MessagesState,StateGraph, START, END
from langchain_community.tools import TavilySearchResults
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_community.document_loaders import WebBaseLoader
from langgraph.checkpoint.memory import MemorySaver
import operator
from setup import *
class GeneratorState(MessagesState):
context : Annotated[list, operator.add] # retrived docs
max_usecase : int # no of usecase to generate
topic : str # input query
class SearchQuery(BaseModel):
search_query : str = Field(description = 'Search query for web-search')
keyword_search = TavilySearchResults(
max_results=1,
search_depth="advanced",
include_answer=True,
include_raw_content=True,
include_images=True)
def search_web(state: GeneratorState):
topic = state['topic']
structured_llm = llm2.with_structured_output(SearchQuery)
search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
## **Instructions:**
- Extract the **industry name** from the user’s query.
- Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
- Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
- Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
- Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
---
## **Example:**
User Input: `"GenAI in healthcare"`
Generated Query:
"Generative AI use cases in healthcare applications and impact"
Generate search query for the below:
{topic}
"""
search_prompt = search_instructions.format(topic=topic)
search_query = structured_llm.invoke(search_prompt)
exclude_domains = ["vktr.com"]
search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
page_url = [doc['url'] for doc in search_docs]
loader = WebBaseLoader(
web_paths= page_url,
bs_get_text_kwargs={"separator": "|", "strip": True},
raise_for_status=True,
)
docs = loader.load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
for doc in docs
])
summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
Focus on Key Points: Extract essential facts, insights, and takeaways.
Maintain Clarity & Coherence: Ensure logical flow and readability.
Preserve Critical Data: Retain names, dates, figures, and important references.
Adjust Length as Needed: Summarize concisely while covering all vital aspects.
Format the summary professionally, adapting tone and detail to match the context.
context : {formatted_search_docs}
'''
summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
return {'context': [summarized_docs]}
def generate_usecases(state: GeneratorState):
topic = state['topic']
context = state['context']
generation_prompt = '''
You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
topic : {topic}
context:
{context}
# Focus Title: [Provided Title]
## Introduction:
Provide a concise overview of the report's purpose and relevance.
## Use Case 1: [Descriptive Title]
**Objective/Use Case:** Summarize the goal in one or two sentences.
**AI Application:** Describe the AI technologies used.
### Cross-Functional Benefit:
- **[Department]:** [Benefit]
- **[Department]:** [Benefit]
## Use Case 2: [Descriptive Title]
(Repeat format)
## Conclusion:
Summarize key takeaways and potential future implications.
Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
system_message = generation_prompt.format(topic=topic, context=context)
answer = llm1.invoke([SystemMessage(content=system_message)])
return {'messages': answer}
def graph(topic,max_analysts):
graph_builder = StateGraph(GeneratorState)
graph_builder.add_node('search_web', search_web)
graph_builder.add_node('usecase_generation', generate_usecases)
graph_builder.add_edge(START, 'search_web')
graph_builder.add_edge('search_web', 'usecase_generation')
graph_builder.add_edge('usecase_generation', END)
memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)
config = {"configurable": {"thread_id": "1"}}
graph.invoke({"topic":topic,
"max_analysts":max_analysts},
config)
final_state = graph.get_state(config)
report = final_state.values['messages'][0].content
return report |