Spaces:
Running
Running
import json | |
import re | |
from json import JSONDecodeError | |
from repository.ollama import OllamaRepository | |
from schema import ModelRoles | |
# this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future | |
get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}') | |
def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes): | |
print(f"""so far, questions and answers were parsed successfully {total_success} times, of which | |
{first_try_successes} were successful at the first attempt, | |
{second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent | |
and {regex_successes} were successful using a regex | |
""") | |
if __name__ == '__main__': | |
with open("questions.txt") as questions_file: | |
questions = questions_file.read() | |
with open("system_prompt.txt") as system_prompt_file: | |
system_prompt = system_prompt_file.read() | |
with open("verification_prompt.txt") as verification_prompt_file: | |
verification_prompt = verification_prompt_file.read() | |
verification_prompt = verification_prompt.replace("{questions}", questions) | |
user_prompt = input(f"Please describe what you need to do. To get the best results " | |
f"try to answer the following questions:\n{questions}\n\n>") | |
ollama_repository = OllamaRepository("llama3.1", system_prompt, | |
ModelRoles("system", "user", "assistant")) | |
successful = 0 | |
corrected_json_was_ok = 0 | |
regex_got_all_answers = 0 | |
successful_at_first_attempt = 0 | |
while True: | |
ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}") | |
answer = ollama_repository.send_prompt(verification_prompt) | |
ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings" | |
fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}") | |
answers = fixed_json["content"] | |
try: | |
json_answers = json.loads(answers, strict=False) | |
print(answers) | |
successful_at_first_attempt +=1 | |
successful+=1 | |
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers) | |
except JSONDecodeError as e: | |
print("there was a problem in this json, asking the LLM to fix it passing the exception error message") | |
answer = ( | |
ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. " | |
f"Fix this error and return just the corrected json without adding anything else")) | |
print("trying to parse the corrected json") | |
try: | |
json_answers = json.loads(answer["content"]) | |
corrected_json_was_ok+=1 | |
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, | |
regex_got_all_answers) | |
except JSONDecodeError as e: | |
print("still error, going old school") | |
regex_parse_result = {} | |
for match in get_questions_and_answers_regex.findall(answer["content"]): | |
question, answer = match | |
regex_parse_result[question] = answer | |
if len(regex_parse_result) == 10: | |
print("I got all 10 answers apparently") | |
successful+=1 | |
regex_got_all_answers+=1 | |
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, | |
regex_got_all_answers) | |
else: | |
print(f"unable to parse \n {answers}\n giving up") | |
break | |