import json import re from json import JSONDecodeError from repository.ollama import OllamaRepository from schema import ModelRoles # this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}') def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes): print(f"""so far, questions and answers were parsed successfully {total_success} times, of which {first_try_successes} were successful at the first attempt, {second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent and {regex_successes} were successful using a regex """) if __name__ == '__main__': with open("questions.txt") as questions_file: questions = questions_file.read() with open("system_prompt.txt") as system_prompt_file: system_prompt = system_prompt_file.read() with open("verification_prompt.txt") as verification_prompt_file: verification_prompt = verification_prompt_file.read() verification_prompt = verification_prompt.replace("{questions}", questions) user_prompt = input(f"Please describe what you need to do. To get the best results " f"try to answer the following questions:\n{questions}\n\n>") ollama_repository = OllamaRepository("llama3.1", system_prompt, ModelRoles("system", "user", "assistant")) successful = 0 corrected_json_was_ok = 0 regex_got_all_answers = 0 successful_at_first_attempt = 0 while True: ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}") answer = ollama_repository.send_prompt(verification_prompt) ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings" fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}") answers = fixed_json["content"] try: json_answers = json.loads(answers, strict=False) print(answers) successful_at_first_attempt +=1 successful+=1 print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers) except JSONDecodeError as e: print("there was a problem in this json, asking the LLM to fix it passing the exception error message") answer = ( ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. " f"Fix this error and return just the corrected json without adding anything else")) print("trying to parse the corrected json") try: json_answers = json.loads(answer["content"]) corrected_json_was_ok+=1 print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers) except JSONDecodeError as e: print("still error, going old school") regex_parse_result = {} for match in get_questions_and_answers_regex.findall(answer["content"]): question, answer = match regex_parse_result[question] = answer if len(regex_parse_result) == 10: print("I got all 10 answers apparently") successful+=1 regex_got_all_answers+=1 print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers) else: print(f"unable to parse \n {answers}\n giving up") break