import os import re import requests import json from typing import Tuple, List, Optional from omegaconf import OmegaConf from pydantic import Field, BaseModel from vectara_agentic.agent import Agent from vectara_agentic.agent_config import AgentConfig from vectara_agentic.tools import ToolsFactory, VectaraToolFactory from vectara_agentic.tools_catalog import ToolsCatalog from vectara_agentic.types import ModelProvider, AgentType from dotenv import load_dotenv load_dotenv(override=True) citation_description = ''' The citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. ''' def extract_components_from_citation(citation: str) -> dict: citation_components = citation.split(' ') volume_num = citation_components[0] reporter = '-'.join(citation_components[1:-1]).replace('.', '').lower() first_page = citation_components[-1] if not volume_num.isdigit(): return {} if not first_page.isdigit(): return {} return {'volume': int(volume_num), 'reporter': reporter, 'first_page': int(first_page)} class AgentTools: def __init__(self, _cfg, agent_config): self.tools_factory = ToolsFactory() self.agent_config = agent_config self.cfg = _cfg self.vec_factory = VectaraToolFactory( vectara_api_key=_cfg.api_key, vectara_corpus_key=_cfg.corpus_key, ) def get_opinion_text( self, case_citation: str = Field(description = citation_description), summarize: bool = Field(default=True, description="if True returns case summary, otherwise the full text of the case") ) -> str: """ Returns the full opinion/ruling text of the case, or the summary if summarize=True. If there is more than one opinion for the case, the type of each opinion is returned with the text, and the opinions (or their summaries) are separated by semicolons (;) Args case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. summarize (bool): True to return just a summary of the case, False to return full case text. returns str: the full opinion/ruling text of the case, or the summary if summarize is True. """ citation_dict = extract_components_from_citation(case_citation) if not citation_dict: return f"Citation is invalid: {case_citation}." summarize_text = ToolsCatalog(self.agent_config).summarize_text reporter = citation_dict['reporter'] volume_num = citation_dict['volume'] first_page = citation_dict['first_page'] response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") if response.status_code != 200: return f"Case not found; please check the citation {case_citation}." res = json.loads(response.text) if len(res["casebody"]["opinions"]) == 1: text = res["casebody"]["opinions"][0]["text"] output = text if not summarize else summarize_text(text, "law") else: output = "" for opinion in res["casebody"]["opinions"]: text = opinion["text"] if not summarize else summarize_text(opinion["text"], "law") output += f"Opinion type: {opinion['type']}, text: {text};" return output def get_case_document_pdf( self, case_citation: str = Field(description = citation_description) ) -> str: """ Given a case citation, returns a valid web URL to a pdf of the case record Args: case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. Returns: str: a valid web URL to a pdf of the case record """ citation_dict = extract_components_from_citation(case_citation) if not citation_dict: return f"Citation is invalid: {case_citation}." reporter = citation_dict['reporter'] volume_num = citation_dict['volume'] first_page = citation_dict['first_page'] response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") if response.status_code != 200: return f"Case not found; please check the citation {case_citation}." res = json.loads(response.text) page_number = res["first_page_order"] return f"https://static.case.law/{reporter}/{volume_num}.pdf#page={page_number}" def get_case_document_page( self, case_citation: str = Field(description = citation_description) ) -> str: """ Given a case citation, returns a valid web URL to a page with information about the case. Args: case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. Returns: str: a valid web URL to a page with information about the case """ citation_dict = extract_components_from_citation(case_citation) if not citation_dict: return f"Citation is invalid: {case_citation}." reporter = citation_dict['reporter'] volume_num = citation_dict['volume'] first_page = citation_dict['first_page'] url = f"https://case.law/caselaw/?reporter={reporter}&volume={volume_num}&case={first_page:04d}-01" response = requests.get(url) if response.status_code != 200: return "Case not found; please check the citation." return url def get_case_name( self, case_citation: str = Field(description = citation_description) ) -> Tuple[str, str]: """ Given a case citation, returns its name and name abbreviation. Args: case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. Returns: Tuple[str, str]: the name and name abbreviation of the case """ citation_dict = extract_components_from_citation(case_citation) if not citation_dict: return f"Citation is invalid: {case_citation}.", f"Citation is invalid: {case_citation}." reporter = citation_dict['reporter'] volume_num = citation_dict['volume'] first_page = citation_dict['first_page'] response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") if response.status_code != 200: return "Case not found", "Case not found" res = json.loads(response.text) return res["name"], res["name_abbreviation"] def get_cited_cases( self, case_citation: str = Field(description = citation_description) ) -> List[dict]: """ Given a case citation, returns a list of cases that are cited by the opinion of this case. Args: case_citation (str): the citation for a particular case. Citation must include the volume number, reporter, and first page. For example: 253 P.2d 136. Returns: A list of cases, each a dict with the citation, name and name_abbreviation of the case. """ citation_dict = extract_components_from_citation(case_citation) if not citation_dict: return [f"Citation is invalid: {case_citation}."] reporter = citation_dict['reporter'] volume_num = citation_dict['volume'] first_page = citation_dict['first_page'] response = requests.get(f"https://static.case.law/{reporter}/{volume_num}/cases/{first_page:04d}-01.json") if response.status_code != 200: return "Case not found; please check the citation." res = json.loads(response.text) citations = res["cites_to"] res = [] for citation in citations[:10]: name, name_abbreviation = self.get_case_name(citation["cite"]) res.append({ "citation": citation["cite"], "name": name, "name_abbreviation": name_abbreviation }) return res def validate_url( self, url: str = Field(description = "A web url pointing to case-law document") ) -> str: """ Given a url, returns whether or not the url is valid. Args: url (str): A web url pointing to case-law document Returns: str: "URL is valid" if the url is valid, "URL is invalid" otherwise. """ pdf_pattern = re.compile(r'^https://static.case.law/.*') document_pattern = re.compile(r'^https://case.law/caselaw/?reporter=.*') return "URL is valid" if pdf_pattern.match(url) or document_pattern.match(url) else "URL is invalid" def get_tools(self): class QueryCaselawArgs(BaseModel): citations: Optional[str] = Field(description = citation_description, default=None) summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o' ask_caselaw = self.vec_factory.create_rag_tool( tool_name = "ask_caselaw", tool_description = "A tool for asking questions about case law, and any legal issue in the state of Alaska.", tool_args_schema = QueryCaselawArgs, reranker = "chain", rerank_k = 100, rerank_chain = [ { "type": "slingshot", "cutoff": 0.2 }, { "type": "mmr", "diversity_bias": 0.1 }, { "type": "userfn", "user_function": "max(1000 * get('$.score') - hours(seconds(to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.decision_date'), 'yyyy-MM-dd')))) / 24 / 365, 0)" } ], n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, summary_num_results = 15, vectara_summarizer = summarizer, max_tokens = 4096, max_response_chars = 8192, include_citations = True, save_history = True, ) search_caselaw = self.vec_factory.create_search_tool( tool_name = "search_caselaw", tool_description = "A tool for retrieving a list of relevant documents about case law in Alaska.", tool_args_schema = QueryCaselawArgs, reranker = "chain", rerank_k = 100, rerank_chain = [ { "type": "slingshot", "cutoff": 0.2 }, { "type": "mmr", "diversity_bias": 0.1 }, ], n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005, ) return ( [ask_caselaw, search_caselaw] + [self.tools_factory.create_tool(tool) for tool in [ self.get_opinion_text, self.get_case_document_pdf, self.get_case_document_page, self.get_cited_cases, self.get_case_name, self.validate_url ]] ) def get_agent_config() -> OmegaConf: cfg = OmegaConf.create({ 'corpus_key': str(os.environ['VECTARA_CORPUS_KEY']), 'api_key': str(os.environ['VECTARA_API_KEY']), 'examples': os.environ.get('QUERY_EXAMPLES', None), 'demo_name': "legal-agent", 'demo_welcome': "Welcome to the Legal Assistant demo.", 'demo_description': "This demo can help you prepare for a court case by providing you information about past court cases in Alaska.", }) return cfg def initialize_agent(_cfg, agent_progress_callback=None): legal_assistant_instructions = """ - You are a helpful legal assistant, with case law expertise in the state of Alaska. - Always use the 'ask_caselaw' tool first, as your primary tool for answering questions. Never use your own knowledge. - The references returned by the 'ask_caselaw' tool include metadata relevant to its response, such as case citations, dates, or names. - Use the 'search_caselaw' tool to search for documents related to case law in Alaska, and set summarize=True to get a summary of those documents. - When using a case citation in your response, try to include a valid URL along with it: * Call the 'get_case_document_pdf' for a case citation to obtain a valid web URL to a pdf of the case record. * If this doesn't work, call the 'get_case_document_page' for a case citation to obtain a valid web URL to a page with information about the case. - When including a URL for a citation in your response, use the citation as anchor text, and the URL as the link. - Never use your internal knowledge to guess a case citation. Only use citation information provided by a tool or by the user. - A Case Citation includes 3 components: volume number, reporter, and first page. Here are some examples: '253 P.2d 136', '10 Alaska 11', '6 C.M.A. 3' - If two cases have conflicting rulings, assume that the case with the more current ruling date is correct. - If the response is based on cases that are older than 5 years, make sure to inform the user that the information may be outdated, since some case opinions may no longer apply in law. - If a user wants to test their argument, use the 'ask_caselaw' tool to gather information about cases related to their argument and the 'critique_as_judge' tool to determine whether their argument is sound or has issues that must be corrected. - Never discuss politics, and always respond politely. """ agent_config = AgentConfig( agent_type = os.getenv("VECTARA_AGENTIC_AGENT_TYPE", AgentType.OPENAI.value), main_llm_provider = os.getenv("VECTARA_AGENTIC_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), main_llm_model_name = os.getenv("VECTARA_AGENTIC_MAIN_MODEL_NAME", ""), tool_llm_provider = os.getenv("VECTARA_AGENTIC_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), tool_llm_model_name = os.getenv("VECTARA_AGENTIC_TOOL_MODEL_NAME", ""), observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") ) fallback_agent_config = AgentConfig( agent_type = os.getenv("VECTARA_AGENTIC_FALLBACK_AGENT_TYPE", AgentType.OPENAI.value), main_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_LLM_PROVIDER", ModelProvider.OPENAI.value), main_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_MAIN_MODEL_NAME", ""), tool_llm_provider = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_LLM_PROVIDER", ModelProvider.OPENAI.value), tool_llm_model_name = os.getenv("VECTARA_AGENTIC_FALLBACK_TOOL_MODEL_NAME", ""), observer = os.getenv("VECTARA_AGENTIC_OBSERVER_TYPE", "NO_OBSERVER") ) agent = Agent( tools=AgentTools(_cfg, agent_config).get_tools(), topic="Case law in Alaska", custom_instructions=legal_assistant_instructions, agent_progress_callback=agent_progress_callback, agent_config=agent_config, fallback_agent_config=fallback_agent_config, ) agent.report(detailed=False) return agent