Dharma20 commited on
Commit
cdefda1
·
verified ·
1 Parent(s): 7763c11

Update short.py

Browse files
Files changed (1) hide show
  1. short.py +143 -143
short.py CHANGED
@@ -1,144 +1,144 @@
1
- from pydantic import BaseModel,Field
2
- from typing import TypedDict, Annotated
3
- from langgraph.graph import MessagesState,StateGraph, START, END
4
- from langchain_community.tools import TavilySearchResults
5
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
6
- from langchain_community.document_loaders import WebBaseLoader
7
- from langgraph.checkpoint.memory import MemorySaver
8
-
9
-
10
- import operator
11
- from setup import *
12
-
13
- class GeneratorState(MessagesState):
14
- context : Annotated[list, operator.add] # retrived docs
15
- max_usecase : int # no of usecase to generate
16
- topic : str # input query
17
-
18
-
19
- class SearchQuery(BaseModel):
20
- search_query : str = Field(description = 'Search query for web-search')
21
-
22
-
23
- keyword_search = TavilySearchResults(
24
- max_results=1,
25
- search_depth="advanced",
26
- include_answer=True,
27
- include_raw_content=True,
28
- include_images=True)
29
-
30
-
31
-
32
- def search_web(state: GeneratorState):
33
-
34
- topic = state['topic']
35
- structured_llm = llm2.with_structured_output(SearchQuery)
36
-
37
- search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
38
-
39
- ## **Instructions:**
40
- - Extract the **industry name** from the user’s query.
41
- - Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
42
- - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
43
- - Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
44
- - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
45
-
46
- ---
47
- ## **Example:**
48
- User Input: `"GenAI in healthcare"`
49
- Generated Query:
50
- "Generative AI use cases in healthcare applications and impact"
51
-
52
- Generate search query for the below:
53
- {topic}
54
- """
55
-
56
- search_prompt = search_instructions.format(topic=topic)
57
- search_query = structured_llm.invoke(search_prompt)
58
- exclude_domains = ["vktr.com"]
59
- search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
60
- page_url = [doc['url'] for doc in search_docs]
61
- loader = WebBaseLoader(
62
- web_paths= page_url,
63
- bs_get_text_kwargs={"separator": "|", "strip": True},
64
- raise_for_status=True,
65
- )
66
- docs = loader.load()
67
- formatted_search_docs = "\n\n---\n\n".join(
68
- [
69
- f'<Document href="{doc.metadata['source']}"/>\n{doc.page_content}\n</Document>'
70
- for doc in docs
71
- ])
72
-
73
- summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
74
- Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
75
- Focus on Key Points: Extract essential facts, insights, and takeaways.
76
- Maintain Clarity & Coherence: Ensure logical flow and readability.
77
- Preserve Critical Data: Retain names, dates, figures, and important references.
78
- Adjust Length as Needed: Summarize concisely while covering all vital aspects.
79
- Format the summary professionally, adapting tone and detail to match the context.
80
- context : {formatted_search_docs}
81
- '''
82
- summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
83
-
84
- return {'context': [summarized_docs]}
85
-
86
-
87
-
88
- def generate_usecases(state: GeneratorState):
89
- topic = state['topic']
90
- context = state['context']
91
- generation_prompt = '''
92
- You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
93
- topic : {topic}
94
- context:
95
- {context}
96
-
97
- # Focus Title: [Provided Title]
98
-
99
- ## Introduction:
100
- Provide a concise overview of the report's purpose and relevance.
101
-
102
- ## Use Case 1: [Descriptive Title]
103
- **Objective/Use Case:** Summarize the goal in one or two sentences.
104
- **AI Application:** Describe the AI technologies used.
105
- **Cross-Functional Benefit:**
106
- - **[Department]:** [Benefit]
107
- - **[Department]:** [Benefit]
108
-
109
- ## Use Case 2: [Descriptive Title]
110
- (Repeat format)
111
-
112
- ## Conclusion:
113
- Summarize key takeaways and potential future implications.
114
-
115
- Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
116
-
117
- system_message = generation_prompt.format(topic=topic, context=context)
118
- answer = llm1.invoke([SystemMessage(content=system_message)])
119
-
120
- return {'messages': answer}
121
-
122
-
123
-
124
- def graph(topic,max_analysts):
125
- graph_builder = StateGraph(GeneratorState)
126
-
127
- graph_builder.add_node('search_web', search_web)
128
- graph_builder.add_node('usecase_generation', generate_usecases)
129
-
130
- graph_builder.add_edge(START, 'search_web')
131
- graph_builder.add_edge('search_web', 'usecase_generation')
132
- graph_builder.add_edge('usecase_generation', END)
133
-
134
- memory = MemorySaver()
135
- graph = graph_builder.compile(checkpointer=memory)
136
- config = {"configurable": {"thread_id": "1"}}
137
- graph.invoke({"topic":topic,
138
- "max_analysts":max_analysts},
139
- config)
140
-
141
- final_state = graph.get_state(config)
142
- report = final_state.values['messages'][0].content
143
-
144
  return report
 
1
+ from pydantic import BaseModel,Field
2
+ from typing import TypedDict, Annotated
3
+ from langgraph.graph import MessagesState,StateGraph, START, END
4
+ from langchain_community.tools import TavilySearchResults
5
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
6
+ from langchain_community.document_loaders import WebBaseLoader
7
+ from langgraph.checkpoint.memory import MemorySaver
8
+
9
+
10
+ import operator
11
+ from setup import *
12
+
13
+ class GeneratorState(MessagesState):
14
+ context : Annotated[list, operator.add] # retrived docs
15
+ max_usecase : int # no of usecase to generate
16
+ topic : str # input query
17
+
18
+
19
+ class SearchQuery(BaseModel):
20
+ search_query : str = Field(description = 'Search query for web-search')
21
+
22
+
23
+ keyword_search = TavilySearchResults(
24
+ max_results=1,
25
+ search_depth="advanced",
26
+ include_answer=True,
27
+ include_raw_content=True,
28
+ include_images=True)
29
+
30
+
31
+
32
+ def search_web(state: GeneratorState):
33
+
34
+ topic = state['topic']
35
+ structured_llm = llm2.with_structured_output(SearchQuery)
36
+
37
+ search_instructions = """You are an AI assistant specialized in generating effective internet search queries. Your goal is to create **precise, keyword-rich search queries** that retrieve the best results for **AI use cases in specific industries** using Tavily Search.
38
+
39
+ ## **Instructions:**
40
+ - Extract the **industry name** from the user’s query.
41
+ - Generate a **focused search query** that retrieves **practical AI use cases** in that industry.
42
+ - Include keywords like **"applications," "use cases," "impact," or "case studies"** to refine the search.
43
+ - Prioritize sources like **research papers, industry reports, and authoritative tech sites**.
44
+ - Use **Google-style operators (e.g., `site:`) to focus on trusted sources** if applicable.
45
+
46
+ ---
47
+ ## **Example:**
48
+ User Input: `"GenAI in healthcare"`
49
+ Generated Query:
50
+ "Generative AI use cases in healthcare applications and impact"
51
+
52
+ Generate search query for the below:
53
+ {topic}
54
+ """
55
+
56
+ search_prompt = search_instructions.format(topic=topic)
57
+ search_query = structured_llm.invoke(search_prompt)
58
+ exclude_domains = ["vktr.com"]
59
+ search_docs = tavily_search.invoke(search_query.search_query, exclude_domains=exclude_domains)
60
+ page_url = [doc['url'] for doc in search_docs]
61
+ loader = WebBaseLoader(
62
+ web_paths= page_url,
63
+ bs_get_text_kwargs={"separator": "|", "strip": True},
64
+ raise_for_status=True,
65
+ )
66
+ docs = loader.load()
67
+ formatted_search_docs = "\n\n---\n\n".join(
68
+ [
69
+ f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
70
+ for doc in docs
71
+ ])
72
+
73
+ summarization_prompt = '''You are an advanced summarization assistant. Your task is to generate a 500-word summary based on the provided context.
74
+ Maintain key information while removing redundancy, preserving critical details, and ensuring readability. Follow these guidelines:
75
+ Focus on Key Points: Extract essential facts, insights, and takeaways.
76
+ Maintain Clarity & Coherence: Ensure logical flow and readability.
77
+ Preserve Critical Data: Retain names, dates, figures, and important references.
78
+ Adjust Length as Needed: Summarize concisely while covering all vital aspects.
79
+ Format the summary professionally, adapting tone and detail to match the context.
80
+ context : {formatted_search_docs}
81
+ '''
82
+ summarized_docs = llm2.invoke([SystemMessage(content=summarization_prompt)])
83
+
84
+ return {'context': [summarized_docs]}
85
+
86
+
87
+
88
+ def generate_usecases(state: GeneratorState):
89
+ topic = state['topic']
90
+ context = state['context']
91
+ generation_prompt = '''
92
+ You are a highly skilled technical writer. Your task is to consolidate insights from analyst memos into a structured report based on the given context and topic. Ensure the report includes a **brief introduction**, at least five AI use cases, and a **short conclusion**. Follow this format:
93
+ topic : {topic}
94
+ context:
95
+ {context}
96
+
97
+ # Focus Title: [Provided Title]
98
+
99
+ ## Introduction:
100
+ Provide a concise overview of the report's purpose and relevance.
101
+
102
+ ## Use Case 1: [Descriptive Title]
103
+ **Objective/Use Case:** Summarize the goal in one or two sentences.
104
+ **AI Application:** Describe the AI technologies used.
105
+ **Cross-Functional Benefit:**
106
+ - **[Department]:** [Benefit]
107
+ - **[Department]:** [Benefit]
108
+
109
+ ## Use Case 2: [Descriptive Title]
110
+ (Repeat format)
111
+
112
+ ## Conclusion:
113
+ Summarize key takeaways and potential future implications.
114
+
115
+ Ensure clarity, relevance, and no duplicate citations in the **Sources** section. Extract insights accurately from the **context** provided.'''
116
+
117
+ system_message = generation_prompt.format(topic=topic, context=context)
118
+ answer = llm1.invoke([SystemMessage(content=system_message)])
119
+
120
+ return {'messages': answer}
121
+
122
+
123
+
124
+ def graph(topic,max_analysts):
125
+ graph_builder = StateGraph(GeneratorState)
126
+
127
+ graph_builder.add_node('search_web', search_web)
128
+ graph_builder.add_node('usecase_generation', generate_usecases)
129
+
130
+ graph_builder.add_edge(START, 'search_web')
131
+ graph_builder.add_edge('search_web', 'usecase_generation')
132
+ graph_builder.add_edge('usecase_generation', END)
133
+
134
+ memory = MemorySaver()
135
+ graph = graph_builder.compile(checkpointer=memory)
136
+ config = {"configurable": {"thread_id": "1"}}
137
+ graph.invoke({"topic":topic,
138
+ "max_analysts":max_analysts},
139
+ config)
140
+
141
+ final_state = graph.get_state(config)
142
+ report = final_state.values['messages'][0].content
143
+
144
  return report