ankanghosh commited on
Commit
8fd972b
·
verified ·
1 Parent(s): 5c62606

Delete agent.py

Browse files
Files changed (1) hide show
  1. agent.py +0 -430
agent.py DELETED
@@ -1,430 +0,0 @@
1
- """
2
- Agent module for the Fake News Detector application.
3
-
4
- This module implements a LangGraph-based agent that orchestrates
5
- the fact-checking process. It defines the agent setup, tools,
6
- and processing pipeline for claim verification.
7
- """
8
-
9
- import os
10
- import time
11
- import logging
12
- import traceback
13
- from langchain_core.tools import tool
14
- from langchain.prompts import PromptTemplate
15
- from langgraph.prebuilt import create_react_agent
16
-
17
- from utils.models import get_llm_model
18
- from utils.performance import PerformanceTracker
19
- from modules.claim_extraction import extract_claims
20
- from modules.evidence_retrieval import retrieve_combined_evidence
21
- from modules.classification import classify_with_llm, aggregate_evidence
22
- from modules.explanation import generate_explanation
23
-
24
- # Configure logger
25
- logger = logging.getLogger("misinformation_detector")
26
-
27
- # Reference to global performance tracker
28
- performance_tracker = PerformanceTracker()
29
-
30
- # Define LangGraph Tools
31
- @tool
32
- def claim_extractor(query):
33
- """
34
- Tool that extracts factual claims from a given text.
35
-
36
- Args:
37
- query (str): Text containing potential factual claims
38
-
39
- Returns:
40
- str: Extracted factual claim
41
- """
42
- performance_tracker.log_claim_processed()
43
- return extract_claims(query)
44
-
45
- @tool
46
- def evidence_retriever(query):
47
- """
48
- Tool that retrieves evidence from multiple sources for a claim.
49
-
50
- Args:
51
- query (str): The factual claim to gather evidence for
52
-
53
- Returns:
54
- list: List of evidence items from various sources
55
- """
56
- return retrieve_combined_evidence(query)
57
-
58
- @tool
59
- def truth_classifier(query, evidence):
60
- """
61
- Tool that classifies the truthfulness of a claim based on evidence.
62
-
63
- Args:
64
- query (str): The factual claim to classify
65
- evidence (list): Evidence items to evaluate
66
-
67
- Returns:
68
- str: JSON string containing verdict, confidence, and results
69
- """
70
- classification_results = classify_with_llm(query, evidence)
71
- truth_label, confidence = aggregate_evidence(classification_results)
72
-
73
- # Debug logging
74
- logger.info(f"Classification results: {len(classification_results)} items")
75
- logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
76
-
77
- # Ensure confidence is at least 0.6 for any definitive verdict
78
- if "True" in truth_label or "False" in truth_label:
79
- confidence = max(confidence, 0.6)
80
-
81
- # Return a dictionary with all needed information
82
- result = {
83
- "verdict": truth_label,
84
- "confidence": confidence,
85
- "results": classification_results
86
- }
87
-
88
- # Convert to string for consistent handling
89
- import json
90
- return json.dumps(result)
91
-
92
- @tool
93
- def explanation_generator(claim, evidence_results, truth_label):
94
- """
95
- Tool that generates a human-readable explanation for the verdict.
96
-
97
- Args:
98
- claim (str): The factual claim being verified
99
- evidence_results (list): Evidence items and classification results
100
- truth_label (str): The verdict (True/False/Uncertain)
101
-
102
- Returns:
103
- str: Natural language explanation of the verdict
104
- """
105
- explanation = generate_explanation(claim, evidence_results, truth_label)
106
- logger.info(f"Generated explanation: {explanation[:100]}...")
107
- return explanation
108
-
109
- def setup_agent():
110
- """
111
- Create and configure a ReAct agent with the fact-checking tools.
112
-
113
- This function configures a LangGraph ReAct agent with all the
114
- necessary tools for fact checking, including claim extraction,
115
- evidence retrieval, classification, and explanation generation.
116
-
117
- Returns:
118
- object: Configured LangGraph agent ready for claim processing
119
-
120
- Raises:
121
- ValueError: If OpenAI API key is not set
122
- """
123
- # Make sure OpenAI API key is set
124
- if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
125
- logger.error("OPENAI_API_KEY environment variable not set or empty.")
126
- raise ValueError("OpenAI API key is required")
127
-
128
- # Define tools with any customizations
129
- tools = [
130
- claim_extractor,
131
- evidence_retriever,
132
- truth_classifier,
133
- explanation_generator
134
- ]
135
-
136
- # Define the prompt template with clearer, more efficient instructions
137
- FORMAT_INSTRUCTIONS_TEMPLATE = """
138
- Use the following format:
139
- Question: the input question you must answer
140
- Action: the action to take, should be one of: {tool_names}
141
- Action Input: the input to the action
142
- Observation: the result of the action
143
- ... (this Action/Action Input/Observation can repeat N times)
144
- Final Answer: the final answer to the original input question
145
- """
146
-
147
- prompt = PromptTemplate(
148
- input_variables=["input", "tool_names"],
149
- template=f"""
150
- You are a fact-checking assistant that verifies claims by gathering evidence and
151
- determining their truthfulness. Follow these exact steps in sequence:
152
-
153
- 1. Call claim_extractor to extract the main factual claim
154
- 2. Call evidence_retriever to gather evidence about the claim
155
- 3. Call truth_classifier to evaluate the claim using the evidence
156
- 4. Call explanation_generator to explain the result
157
- 5. Provide your Final Answer that summarizes everything
158
-
159
- Execute these steps in order without unnecessary thinking steps between tool calls.
160
- Be direct and efficient in your verification process.
161
-
162
- {FORMAT_INSTRUCTIONS_TEMPLATE}
163
- """
164
- )
165
-
166
- try:
167
- # Get the LLM model
168
- model = get_llm_model()
169
-
170
- # Create the agent with a shorter timeout
171
- graph = create_react_agent(model, tools=tools)
172
- logger.info("Agent created successfully")
173
- return graph
174
- except Exception as e:
175
- logger.error(f"Error creating agent: {str(e)}")
176
- raise e
177
-
178
- def process_claim(claim, agent=None, recursion_limit=20):
179
- """
180
- Process a claim to determine its truthfulness using the agent.
181
-
182
- This function invokes the LangGraph agent to process a factual claim,
183
- extract supporting evidence, evaluate the claim's truthfulness, and
184
- generate a human-readable explanation.
185
-
186
- Args:
187
- claim (str): The factual claim to be verified
188
- agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
189
- recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
190
- Higher values allow more complex reasoning but increase processing time.
191
-
192
- Returns:
193
- dict: Result dictionary containing:
194
- - claim: Extracted factual claim
195
- - evidence: List of evidence pieces
196
- - evidence_count: Number of evidence pieces
197
- - classification: Verdict (True/False/Uncertain)
198
- - confidence: Confidence score (0-1)
199
- - explanation: Human-readable explanation of the verdict
200
- - final_answer: Final answer from the agent
201
- - Or error information if processing failed
202
- """
203
- if agent is None:
204
- logger.error("Agent not initialized. Call setup_agent() first.")
205
- return None
206
-
207
- start_time = time.time()
208
- logger.info(f"Processing claim with agent: {claim}")
209
-
210
- try:
211
- # Format inputs for the agent
212
- inputs = {"messages": [("user", claim)]}
213
-
214
- # Set configuration - reduced recursion limit for faster processing
215
- config = {"recursion_limit": recursion_limit}
216
-
217
- # Invoke the agent
218
- response = agent.invoke(inputs, config)
219
-
220
- # Format the response
221
- result = format_response(response)
222
-
223
- # Log performance
224
- elapsed = time.time() - start_time
225
- logger.info(f"Claim processed in {elapsed:.2f} seconds")
226
-
227
- return result
228
-
229
- except Exception as e:
230
- logger.error(f"Error processing claim with agent: {str(e)}")
231
- logger.error(traceback.format_exc())
232
- return {"error": str(e)}
233
-
234
- def format_response(response):
235
- """
236
- Format the agent's response into a structured result.
237
-
238
- This function extracts key information from the agent's response,
239
- including the claim, evidence, classification, and explanation.
240
- It also performs error handling and provides fallback values.
241
-
242
- Args:
243
- response (dict): Raw response from the LangGraph agent
244
-
245
- Returns:
246
- dict: Structured result containing claim verification data
247
- """
248
- try:
249
- if not response or "messages" not in response:
250
- return {"error": "Invalid response format"}
251
-
252
- messages = response.get("messages", [])
253
-
254
- # Initialize result container with default values
255
- result = {
256
- "claim": None,
257
- "evidence": [],
258
- "evidence_count": 0,
259
- "classification": "Uncertain",
260
- "confidence": 0.2, # Default low confidence
261
- "explanation": "Insufficient evidence to evaluate this claim.",
262
- "final_answer": None,
263
- "thoughts": []
264
- }
265
-
266
- # Track if we found results from each tool
267
- found_tools = {
268
- "claim_extractor": False,
269
- "evidence_retriever": False,
270
- "truth_classifier": False,
271
- "explanation_generator": False
272
- }
273
-
274
- # Extract information from messages
275
- tool_outputs = {}
276
-
277
- for idx, message in enumerate(messages):
278
- # Extract agent thoughts
279
- if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
280
- content = message.content
281
- if "Thought:" in content:
282
- thought_parts = content.split("Thought:", 1)
283
- if len(thought_parts) > 1:
284
- thought = thought_parts[1].split("\n")[0].strip()
285
- result["thoughts"].append(thought)
286
-
287
- # Extract tool outputs
288
- if hasattr(message, "type") and message.type == "tool":
289
- tool_name = getattr(message, "name", "unknown")
290
-
291
- # Store tool outputs
292
- tool_outputs[tool_name] = message.content
293
-
294
- # Extract specific information
295
- if tool_name == "claim_extractor":
296
- found_tools["claim_extractor"] = True
297
- if message.content:
298
- result["claim"] = message.content
299
-
300
- elif tool_name == "evidence_retriever":
301
- found_tools["evidence_retriever"] = True
302
- # Handle string representation of a list
303
- if message.content:
304
- if isinstance(message.content, list):
305
- result["evidence"] = message.content
306
- result["evidence_count"] = len(message.content)
307
- elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
308
- try:
309
- import ast
310
- parsed_content = ast.literal_eval(message.content)
311
- if isinstance(parsed_content, list):
312
- result["evidence"] = parsed_content
313
- result["evidence_count"] = len(parsed_content)
314
- else:
315
- result["evidence"] = [message.content]
316
- result["evidence_count"] = 1
317
- except:
318
- result["evidence"] = [message.content]
319
- result["evidence_count"] = 1
320
- else:
321
- result["evidence"] = [message.content]
322
- result["evidence_count"] = 1
323
- logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
324
-
325
- elif tool_name == "truth_classifier":
326
- found_tools["truth_classifier"] = True
327
-
328
- # Log the incoming content for debugging
329
- logger.info(f"Truth classifier content type: {type(message.content)}")
330
- logger.info(f"Truth classifier content: {message.content}")
331
-
332
- # Handle JSON formatted result from truth_classifier
333
- if isinstance(message.content, str):
334
- try:
335
- import json
336
- # Parse the JSON string
337
- parsed_content = json.loads(message.content)
338
-
339
- # Extract the values from the parsed content
340
- result["classification"] = parsed_content.get("verdict", "Uncertain")
341
- result["confidence"] = float(parsed_content.get("confidence", 0.2))
342
- result["classification_results"] = parsed_content.get("results", [])
343
-
344
- logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
345
- except json.JSONDecodeError:
346
- logger.warning(f"Could not parse truth classifier JSON: {message.content}")
347
- except Exception as e:
348
- logger.warning(f"Error extracting from truth classifier output: {e}")
349
- else:
350
- logger.warning(f"Unexpected truth_classifier content format: {message.content}")
351
-
352
- elif tool_name == "explanation_generator":
353
- found_tools["explanation_generator"] = True
354
- if message.content:
355
- result["explanation"] = message.content
356
- logger.info(f"Found explanation from tool: {message.content[:100]}...")
357
-
358
- # Get final answer from last message
359
- elif idx == len(messages) - 1 and hasattr(message, "content"):
360
- result["final_answer"] = message.content
361
-
362
- # Log which tools weren't found
363
- missing_tools = [tool for tool, found in found_tools.items() if not found]
364
- if missing_tools:
365
- logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
366
-
367
- # FALLBACK: If we have truth classification but explanation is missing, generate it now
368
- if found_tools["truth_classifier"] and not found_tools["explanation_generator"]:
369
- logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
370
-
371
- try:
372
- # Get the necessary inputs for explanation generation
373
- claim = result["claim"]
374
- evidence = result["evidence"]
375
- truth_label = result["classification"]
376
- confidence_value = result["confidence"] # Pass the confidence value
377
- classification_results = result.get("classification_results", [])
378
-
379
- # Choose the best available evidence for explanation
380
- explanation_evidence = classification_results if classification_results else evidence
381
-
382
- # Generate explanation with confidence value
383
- explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
384
-
385
- # Use the generated explanation
386
- if explanation:
387
- logger.info(f"Generated fallback explanation: {explanation[:100]}...")
388
- result["explanation"] = explanation
389
- except Exception as e:
390
- logger.error(f"Error generating fallback explanation: {e}")
391
-
392
- # Make sure evidence exists
393
- if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
394
- logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
395
- result["evidence_count"] = 0
396
-
397
- # Add debug info about the final result
398
- logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
399
- logger.info(f"Final explanation: {result['explanation'][:100]}...")
400
-
401
- # Add performance metrics
402
- result["performance"] = performance_tracker.get_summary()
403
-
404
- # Memory management - limit the size of evidence and thoughts
405
- # To keep memory usage reasonable for web deployment
406
- if "evidence" in result and isinstance(result["evidence"], list):
407
- limited_evidence = []
408
- for ev in result["evidence"]:
409
- if isinstance(ev, str) and len(ev) > 500:
410
- limited_evidence.append(ev[:497] + "...")
411
- else:
412
- limited_evidence.append(ev)
413
- result["evidence"] = limited_evidence
414
-
415
- # Limit thoughts to conserve memory
416
- if "thoughts" in result and len(result["thoughts"]) > 10:
417
- result["thoughts"] = result["thoughts"][:10]
418
-
419
- return result
420
-
421
- except Exception as e:
422
- logger.error(f"Error formatting agent response: {str(e)}")
423
- logger.error(traceback.format_exc())
424
- return {
425
- "error": str(e),
426
- "traceback": traceback.format_exc(),
427
- "classification": "Error",
428
- "confidence": 0.1,
429
- "explanation": "An error occurred while processing this claim."
430
- }