Update short.py
Browse files
short.py
CHANGED
@@ -1,144 +1,144 @@
|
|
1 |
-
from pydantic import BaseModel,Field
|
2 |
-
from typing import TypedDict, Annotated
|
3 |
-
from langgraph.graph import MessagesState,StateGraph, START, END
|
4 |
-
from langchain_community.tools import TavilySearchResults
|
5 |
-
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
6 |
-
from langchain_community.document_loaders import WebBaseLoader
|
7 |
-
from langgraph.checkpoint.memory import MemorySaver
|
8 |
-
|
9 |
-
|
10 |
-
import operator
|
11 |
-
from setup import *
|
12 |
-
|
13 |
-
class GeneratorState(MessagesState):
|
14 |
-
context : Annotated[list, operator.add] # retrived docs
|
15 |
-
max_usecase : int # no of usecase to generate
|
16 |
-
topic : str # input query
|
17 |
-
|
18 |
-
|
19 |
-
class SearchQuery(BaseModel):
|
20 |
-
search_query : str = Field(description = 'Search query for web-search')
|
21 |
-
|
22 |
-
|
23 |
-
keyword_search = TavilySearchResults(
|
24 |
-
max_results=1,
|
25 |
-
search_depth="advanced",
|
26 |
-
include_answer=True,
|
27 |
-
include_raw_content=True,
|
28 |
-
include_images=True)
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
def search_web(state: GeneratorState):
|
33 |
-
|
34 |
-
topic = state['topic']
|
35 |
-
structured_llm = llm2.with_structured_output(SearchQuery)
|
36 |
-
|
37 |
-
search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
|
38 |
-
|
39 |
-
## **Instructions:**
|
40 |
-
- Extract the **industry name** from the user’s query.
|
41 |
-
- Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
|
42 |
-
- Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
|
43 |
-
- Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
|
44 |
-
- Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
|
45 |
-
|
46 |
-
---
|
47 |
-
## **Example:**
|
48 |
-
User Input: `"GenAI in healthcare"`
|
49 |
-
Generated Query:
|
50 |
-
"Generative AI use cases in healthcare applications and impact"
|
51 |
-
|
52 |
-
Generate search query for the below:
|
53 |
-
{topic}
|
54 |
-
"""
|
55 |
-
|
56 |
-
search_prompt = search_instructions.format(topic=topic)
|
57 |
-
search_query = structured_llm.invoke(search_prompt)
|
58 |
-
exclude_domains = ["vktr.com"]
|
59 |
-
search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
|
60 |
-
page_url = [doc['url'] for doc in search_docs]
|
61 |
-
loader = WebBaseLoader(
|
62 |
-
web_paths= page_url,
|
63 |
-
bs_get_text_kwargs={"separator": "|", "strip": True},
|
64 |
-
raise_for_status=True,
|
65 |
-
)
|
66 |
-
docs = loader.load()
|
67 |
-
formatted_search_docs = "\n\n---\n\n".join(
|
68 |
-
[
|
69 |
-
f'<Document href="{doc.metadata[
|
70 |
-
for doc in docs
|
71 |
-
])
|
72 |
-
|
73 |
-
summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
|
74 |
-
Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
|
75 |
-
Focus on Key Points: Extract essential facts, insights, and takeaways.
|
76 |
-
Maintain Clarity & Coherence: Ensure logical flow and readability.
|
77 |
-
Preserve Critical Data: Retain names, dates, figures, and important references.
|
78 |
-
Adjust Length as Needed: Summarize concisely while covering all vital aspects.
|
79 |
-
Format the summary professionally, adapting tone and detail to match the context.
|
80 |
-
context : {formatted_search_docs}
|
81 |
-
'''
|
82 |
-
summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
|
83 |
-
|
84 |
-
return {'context': [summarized_docs]}
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
def generate_usecases(state: GeneratorState):
|
89 |
-
topic = state['topic']
|
90 |
-
context = state['context']
|
91 |
-
generation_prompt = '''
|
92 |
-
You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
|
93 |
-
topic : {topic}
|
94 |
-
context:
|
95 |
-
{context}
|
96 |
-
|
97 |
-
# Focus Title: [Provided Title]
|
98 |
-
|
99 |
-
## Introduction:
|
100 |
-
Provide a concise overview of the report's purpose and relevance.
|
101 |
-
|
102 |
-
## Use Case 1: [Descriptive Title]
|
103 |
-
**Objective/Use Case:** Summarize the goal in one or two sentences.
|
104 |
-
**AI Application:** Describe the AI technologies used.
|
105 |
-
**Cross-Functional Benefit:**
|
106 |
-
- **[Department]:** [Benefit]
|
107 |
-
- **[Department]:** [Benefit]
|
108 |
-
|
109 |
-
## Use Case 2: [Descriptive Title]
|
110 |
-
(Repeat format)
|
111 |
-
|
112 |
-
## Conclusion:
|
113 |
-
Summarize key takeaways and potential future implications.
|
114 |
-
|
115 |
-
Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
|
116 |
-
|
117 |
-
system_message = generation_prompt.format(topic=topic, context=context)
|
118 |
-
answer = llm1.invoke([SystemMessage(content=system_message)])
|
119 |
-
|
120 |
-
return {'messages': answer}
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
def graph(topic,max_analysts):
|
125 |
-
graph_builder = StateGraph(GeneratorState)
|
126 |
-
|
127 |
-
graph_builder.add_node('search_web', search_web)
|
128 |
-
graph_builder.add_node('usecase_generation', generate_usecases)
|
129 |
-
|
130 |
-
graph_builder.add_edge(START, 'search_web')
|
131 |
-
graph_builder.add_edge('search_web', 'usecase_generation')
|
132 |
-
graph_builder.add_edge('usecase_generation', END)
|
133 |
-
|
134 |
-
memory = MemorySaver()
|
135 |
-
graph = graph_builder.compile(checkpointer=memory)
|
136 |
-
config = {"configurable": {"thread_id": "1"}}
|
137 |
-
graph.invoke({"topic":topic,
|
138 |
-
"max_analysts":max_analysts},
|
139 |
-
config)
|
140 |
-
|
141 |
-
final_state = graph.get_state(config)
|
142 |
-
report = final_state.values['messages'][0].content
|
143 |
-
|
144 |
return report
|
|
|
1 |
+
from pydantic import BaseModel,Field
|
2 |
+
from typing import TypedDict, Annotated
|
3 |
+
from langgraph.graph import MessagesState,StateGraph, START, END
|
4 |
+
from langchain_community.tools import TavilySearchResults
|
5 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
6 |
+
from langchain_community.document_loaders import WebBaseLoader
|
7 |
+
from langgraph.checkpoint.memory import MemorySaver
|
8 |
+
|
9 |
+
|
10 |
+
import operator
|
11 |
+
from setup import *
|
12 |
+
|
13 |
+
class GeneratorState(MessagesState):
|
14 |
+
context : Annotated[list, operator.add] # retrived docs
|
15 |
+
max_usecase : int # no of usecase to generate
|
16 |
+
topic : str # input query
|
17 |
+
|
18 |
+
|
19 |
+
class SearchQuery(BaseModel):
|
20 |
+
search_query : str = Field(description = 'Search query for web-search')
|
21 |
+
|
22 |
+
|
23 |
+
keyword_search = TavilySearchResults(
|
24 |
+
max_results=1,
|
25 |
+
search_depth="advanced",
|
26 |
+
include_answer=True,
|
27 |
+
include_raw_content=True,
|
28 |
+
include_images=True)
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
def search_web(state: GeneratorState):
|
33 |
+
|
34 |
+
topic = state['topic']
|
35 |
+
structured_llm = llm2.with_structured_output(SearchQuery)
|
36 |
+
|
37 |
+
search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
|
38 |
+
|
39 |
+
## **Instructions:**
|
40 |
+
- Extract the **industry name** from the user’s query.
|
41 |
+
- Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
|
42 |
+
- Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
|
43 |
+
- Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
|
44 |
+
- Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
|
45 |
+
|
46 |
+
---
|
47 |
+
## **Example:**
|
48 |
+
User Input: `"GenAI in healthcare"`
|
49 |
+
Generated Query:
|
50 |
+
"Generative AI use cases in healthcare applications and impact"
|
51 |
+
|
52 |
+
Generate search query for the below:
|
53 |
+
{topic}
|
54 |
+
"""
|
55 |
+
|
56 |
+
search_prompt = search_instructions.format(topic=topic)
|
57 |
+
search_query = structured_llm.invoke(search_prompt)
|
58 |
+
exclude_domains = ["vktr.com"]
|
59 |
+
search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
|
60 |
+
page_url = [doc['url'] for doc in search_docs]
|
61 |
+
loader = WebBaseLoader(
|
62 |
+
web_paths= page_url,
|
63 |
+
bs_get_text_kwargs={"separator": "|", "strip": True},
|
64 |
+
raise_for_status=True,
|
65 |
+
)
|
66 |
+
docs = loader.load()
|
67 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
68 |
+
[
|
69 |
+
f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
|
70 |
+
for doc in docs
|
71 |
+
])
|
72 |
+
|
73 |
+
summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
|
74 |
+
Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
|
75 |
+
Focus on Key Points: Extract essential facts, insights, and takeaways.
|
76 |
+
Maintain Clarity & Coherence: Ensure logical flow and readability.
|
77 |
+
Preserve Critical Data: Retain names, dates, figures, and important references.
|
78 |
+
Adjust Length as Needed: Summarize concisely while covering all vital aspects.
|
79 |
+
Format the summary professionally, adapting tone and detail to match the context.
|
80 |
+
context : {formatted_search_docs}
|
81 |
+
'''
|
82 |
+
summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
|
83 |
+
|
84 |
+
return {'context': [summarized_docs]}
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
def generate_usecases(state: GeneratorState):
|
89 |
+
topic = state['topic']
|
90 |
+
context = state['context']
|
91 |
+
generation_prompt = '''
|
92 |
+
You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
|
93 |
+
topic : {topic}
|
94 |
+
context:
|
95 |
+
{context}
|
96 |
+
|
97 |
+
# Focus Title: [Provided Title]
|
98 |
+
|
99 |
+
## Introduction:
|
100 |
+
Provide a concise overview of the report's purpose and relevance.
|
101 |
+
|
102 |
+
## Use Case 1: [Descriptive Title]
|
103 |
+
**Objective/Use Case:** Summarize the goal in one or two sentences.
|
104 |
+
**AI Application:** Describe the AI technologies used.
|
105 |
+
**Cross-Functional Benefit:**
|
106 |
+
- **[Department]:** [Benefit]
|
107 |
+
- **[Department]:** [Benefit]
|
108 |
+
|
109 |
+
## Use Case 2: [Descriptive Title]
|
110 |
+
(Repeat format)
|
111 |
+
|
112 |
+
## Conclusion:
|
113 |
+
Summarize key takeaways and potential future implications.
|
114 |
+
|
115 |
+
Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
|
116 |
+
|
117 |
+
system_message = generation_prompt.format(topic=topic, context=context)
|
118 |
+
answer = llm1.invoke([SystemMessage(content=system_message)])
|
119 |
+
|
120 |
+
return {'messages': answer}
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
def graph(topic,max_analysts):
|
125 |
+
graph_builder = StateGraph(GeneratorState)
|
126 |
+
|
127 |
+
graph_builder.add_node('search_web', search_web)
|
128 |
+
graph_builder.add_node('usecase_generation', generate_usecases)
|
129 |
+
|
130 |
+
graph_builder.add_edge(START, 'search_web')
|
131 |
+
graph_builder.add_edge('search_web', 'usecase_generation')
|
132 |
+
graph_builder.add_edge('usecase_generation', END)
|
133 |
+
|
134 |
+
memory = MemorySaver()
|
135 |
+
graph = graph_builder.compile(checkpointer=memory)
|
136 |
+
config = {"configurable": {"thread_id": "1"}}
|
137 |
+
graph.invoke({"topic":topic,
|
138 |
+
"max_analysts":max_analysts},
|
139 |
+
config)
|
140 |
+
|
141 |
+
final_state = graph.get_state(config)
|
142 |
+
report = final_state.values['messages'][0].content
|
143 |
+
|
144 |
return report
|