File size: 4,085 Bytes
d015f0c
7b864ba
d015f0c
 
 
 
8d06b39
7b864ba
 
 
 
 
 
 
 
 
 
 
 
8d06b39
d015f0c
 
 
 
 
 
 
 
 
 
 
 
7b864ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d015f0c
8d06b39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import json
import re
from json import JSONDecodeError

from repository.ollama import OllamaRepository
from schema import ModelRoles

# this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future
get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}')


def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes):
    print(f"""so far, questions and answers were parsed successfully {total_success} times, of which 
              {first_try_successes} were successful at the first attempt, 
              {second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent 
              and {regex_successes} were successful using a regex
          """)


if __name__ == '__main__':
    with open("questions.txt") as questions_file:
        questions = questions_file.read()
    with open("system_prompt.txt") as system_prompt_file:
        system_prompt = system_prompt_file.read()
    with open("verification_prompt.txt") as verification_prompt_file:
        verification_prompt = verification_prompt_file.read()
    verification_prompt = verification_prompt.replace("{questions}", questions)
    user_prompt = input(f"Please describe what you need to do. To get the best results "
          f"try to answer the following questions:\n{questions}\n\n>")

    ollama_repository = OllamaRepository("llama3.1", system_prompt,
                                         ModelRoles("system", "user", "assistant"))
    successful = 0
    corrected_json_was_ok = 0
    regex_got_all_answers = 0
    successful_at_first_attempt = 0

    while True:
        ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
        answer = ollama_repository.send_prompt(verification_prompt)
        ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings"
        fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}")
        answers = fixed_json["content"]
        try:
            json_answers = json.loads(answers, strict=False)
            print(answers)
            successful_at_first_attempt +=1
            successful+=1
            print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers)
        except JSONDecodeError as e:
            print("there was a problem in this json, asking the LLM to fix it passing the exception error message")
            answer = (
                ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. "
                                              f"Fix this error and return just the corrected json without adding anything else"))

            print("trying to parse the corrected json")
            try:
                json_answers = json.loads(answer["content"])
                corrected_json_was_ok+=1
                print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
                                     regex_got_all_answers)
            except JSONDecodeError as e:
                print("still error, going old school")
                regex_parse_result = {}
                for match in get_questions_and_answers_regex.findall(answer["content"]):
                    question, answer = match
                    regex_parse_result[question] = answer
                if len(regex_parse_result) == 10:
                    print("I got all 10 answers apparently")
                    successful+=1
                    regex_got_all_answers+=1
                    print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
                                         regex_got_all_answers)
                else:
                    print(f"unable to parse \n {answers}\n giving up")
                    break