import os import chromadb from chromadb.utils import embedding_functions import json import re from openai import OpenAI import re import json def robust_json_extractor(response_content): # Preprocess: Remove markdown code blocks and extra whitespace cleaned = re.sub(r'``````', '', response_content).strip() # Key-specific regex patterns patterns = { "verdict": r'"verdict"\s*:\s*"((?:\\"|[^"])*)"', "evidence": r'"evidence"\s*:\s*(\[[^\]]*?\]|\[.*?\])(?=\s*[,}])', "reasoning": r'"reasoning"\s*:\s*"((?:\\"|[^"])*)"' } result = {} for key, pattern in patterns.items(): match = re.search(pattern, cleaned, re.DOTALL) if match: try: if key == "evidence": # Handle array parsing with json.loads evidence_str = re.sub(r'(?