enricorampazzo commited on
Commit
7b864ba
·
1 Parent(s): d015f0c

first implementation for LLM integration

Browse files
Files changed (5) hide show
  1. test.json +30 -10
  2. app.py +57 -11
  3. repository/ollama.py +3 -2
  4. system_prompt.txt +0 -1
  5. verification_prompt.txt +18 -4
test.json CHANGED
@@ -1,7 +1,26 @@
1
-
2
- [{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "question": "1) What do you need to do?",
4
- "answer": "Pest control treatment"
5
  },
6
  {
7
  "question": "2) In which community is the work taking place?",
@@ -9,7 +28,7 @@
9
  },
10
  {
11
  "question": "3) In which building?",
12
- "answer": "Sadaf"
13
  },
14
  {
15
  "question": "4) In which unit/apartment number?",
@@ -17,19 +36,19 @@
17
  },
18
  {
19
  "question": "5) Am I an owner or a tenant?",
20
- "answer": "Owner"
21
  },
22
  {
23
  "question": "6) In which date is the work taking place?",
24
- "answer": "7/8/2024"
25
  },
26
  {
27
  "question": "7) In which date will the work finish?",
28
- "answer": "Unclear" (assuming it's a one-day job, but not specified)
29
  },
30
  {
31
  "question": "8) What is my contact number?",
32
- "answer": "Unclear"
33
  },
34
  {
35
  "question": "9) What is the name of the contracting company?",
@@ -37,5 +56,6 @@
37
  },
38
  {
39
  "question": "10) What is the contact number of the contracting company?",
40
- "answer": "Unclear"
41
- }]
 
 
1
+ [
2
+ {
3
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
4
+ "$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
5
+ "title": "Answer",
6
+ "description": "an answer to a question",
7
+ "type": "array",
8
+ "items": {
9
+ "type": "object",
10
+ "properties": {
11
+ "question": {
12
+ "type": "string"
13
+ },
14
+ "answer": {
15
+ "type": ["string", "null"]
16
+ }
17
+ },
18
+ "required": ["question", "answer"]
19
+ }
20
+ },
21
+ {
22
  "question": "1) What do you need to do?",
23
+ "answer": "Pest control"
24
  },
25
  {
26
  "question": "2) In which community is the work taking place?",
 
28
  },
29
  {
30
  "question": "3) In which building?",
31
+ "answer": "Sadaf 5"
32
  },
33
  {
34
  "question": "4) In which unit/apartment number?",
 
36
  },
37
  {
38
  "question": "5) Am I an owner or a tenant?",
39
+ "answer": "unclear"
40
  },
41
  {
42
  "question": "6) In which date is the work taking place?",
43
+ "answer": "7/9/2024"
44
  },
45
  {
46
  "question": "7) In which date will the work finish?",
47
+ "answer": null
48
  },
49
  {
50
  "question": "8) What is my contact number?",
51
+ "answer": null
52
  },
53
  {
54
  "question": "9) What is the name of the contracting company?",
 
56
  },
57
  {
58
  "question": "10) What is the contact number of the contracting company?",
59
+ "answer": null
60
+ }
61
+ ]
app.py CHANGED
@@ -1,9 +1,22 @@
1
  import json
 
2
  from json import JSONDecodeError
3
 
4
  from repository.ollama import OllamaRepository
5
  from schema import ModelRoles
6
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  if __name__ == '__main__':
8
  with open("questions.txt") as questions_file:
9
  questions = questions_file.read()
@@ -17,16 +30,49 @@ if __name__ == '__main__':
17
 
18
  ollama_repository = OllamaRepository("llama3.1", system_prompt,
19
  ModelRoles("system", "user", "assistant"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
22
- answer = ollama_repository.send_prompt(verification_prompt)
23
- json_data_start = answer["content"].index("{")
24
- json_data_stop = answer["content"].rindex("}")
25
-
26
- answers = "[" + answer["content"][json_data_start:json_data_stop+1] + "]"
27
- try:
28
- json_answers = json.loads(answers)
29
- print(json_answers)
30
- except JSONDecodeError as e:
31
- print(f"unable to parse \n {answers}")
32
 
 
1
  import json
2
+ import re
3
  from json import JSONDecodeError
4
 
5
  from repository.ollama import OllamaRepository
6
  from schema import ModelRoles
7
 
8
+ # this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future
9
+ get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}')
10
+
11
+
12
+ def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes):
13
+ print(f"""so far, questions and answers were parsed successfully {total_success} times, of which
14
+ {first_try_successes} were successful at the first attempt,
15
+ {second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent
16
+ and {regex_successes} were successful using a regex
17
+ """)
18
+
19
+
20
  if __name__ == '__main__':
21
  with open("questions.txt") as questions_file:
22
  questions = questions_file.read()
 
30
 
31
  ollama_repository = OllamaRepository("llama3.1", system_prompt,
32
  ModelRoles("system", "user", "assistant"))
33
+ successful = 0
34
+ corrected_json_was_ok = 0
35
+ regex_got_all_answers = 0
36
+ successful_at_first_attempt = 0
37
+
38
+ while True:
39
+ ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
40
+ answer = ollama_repository.send_prompt(verification_prompt)
41
+ ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings"
42
+ fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}")
43
+ answers = fixed_json["content"]
44
+ try:
45
+ json_answers = json.loads(answers, strict=False)
46
+ print(answers)
47
+ successful_at_first_attempt +=1
48
+ successful+=1
49
+ print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers)
50
+ except JSONDecodeError as e:
51
+ print("there was a problem in this json, asking the LLM to fix it passing the exception error message")
52
+ answer = (
53
+ ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. "
54
+ f"Fix this error and return just the corrected json without adding anything else"))
55
+
56
+ print("trying to parse the corrected json")
57
+ try:
58
+ json_answers = json.loads(answer["content"])
59
+ corrected_json_was_ok+=1
60
+ print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
61
+ regex_got_all_answers)
62
+ except JSONDecodeError as e:
63
+ print("still error, going old school")
64
+ regex_parse_result = {}
65
+ for match in get_questions_and_answers_regex.findall(answer["content"]):
66
+ question, answer = match
67
+ regex_parse_result[question] = answer
68
+ if len(regex_parse_result) == 10:
69
+ print("I got all 10 answers apparently")
70
+ successful+=1
71
+ regex_got_all_answers+=1
72
+ print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
73
+ regex_got_all_answers)
74
+ else:
75
+ print(f"unable to parse \n {answers}\n giving up")
76
+ break
77
 
 
 
 
 
 
 
 
 
 
 
 
78
 
repository/ollama.py CHANGED
@@ -11,10 +11,11 @@ class OllamaRepository:
11
  self.roles = roles
12
  self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
13
 
14
- def send_prompt(self, prompt):
15
  options: Options = Options(temperature=0.1)
16
  self.message_history.append({"role": self.roles.user_role, "content":prompt})
17
  response = ollama.chat(self.model, self.message_history, options=options)
18
  answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
19
- self.message_history.append(answer)
 
20
  return answer
 
11
  self.roles = roles
12
  self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
13
 
14
+ def send_prompt(self, prompt:str, add_to_history:bool = False) -> dict[str, str]:
15
  options: Options = Options(temperature=0.1)
16
  self.message_history.append({"role": self.roles.user_role, "content":prompt})
17
  response = ollama.chat(self.model, self.message_history, options=options)
18
  answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
19
+ if add_to_history:
20
+ self.message_history.append(answer)
21
  return answer
system_prompt.txt CHANGED
@@ -3,4 +3,3 @@ of this domain.
3
  Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
4
  Murjan, Bahar, Shams, Amwaj, Sadaf
5
  for example "Murjan 1"
6
- Today is the 6th of August 2024
 
3
  Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
4
  Murjan, Bahar, Shams, Amwaj, Sadaf
5
  for example "Murjan 1"
 
verification_prompt.txt CHANGED
@@ -2,12 +2,26 @@ Please tell if I answered the following questions:
2
 
3
  {questions}
4
 
5
- The answer should be a list of json objects formatted as follows:
6
  {
7
- "question": "<question>"
8
- "answer": "<answer>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
10
  if the answer is a date format it as day/month/year.
11
  If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
12
  6th of september 2024.
13
- If the answer is not provided say "Unclear"
 
2
 
3
  {questions}
4
 
5
+ The answer should be a list of json objects that follow this schema and be legal json.
6
  {
7
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
8
+ "$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
9
+ "title": "Answer",
10
+ "description": "an answer to a question",
11
+ "type": "object",
12
+ "properties": {
13
+ "question": {
14
+ "description": "the question being answered",
15
+ "type": "string"
16
+ },
17
+ "answer": {
18
+ "description": "the answer to the question",
19
+ "type": "string"
20
+
21
+ }
22
+ }
23
  }
24
  if the answer is a date format it as day/month/year.
25
  If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
26
  6th of september 2024.
27
+ If the answer is not provided just the answer string should be null