Spaces:
Sleeping
Sleeping
Commit
·
7b864ba
1
Parent(s):
d015f0c
first implementation for LLM integration
Browse files- test.json +30 -10
- app.py +57 -11
- repository/ollama.py +3 -2
- system_prompt.txt +0 -1
- verification_prompt.txt +18 -4
test.json
CHANGED
@@ -1,7 +1,26 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"question": "1) What do you need to do?",
|
4 |
-
"answer": "Pest control
|
5 |
},
|
6 |
{
|
7 |
"question": "2) In which community is the work taking place?",
|
@@ -9,7 +28,7 @@
|
|
9 |
},
|
10 |
{
|
11 |
"question": "3) In which building?",
|
12 |
-
"answer": "Sadaf"
|
13 |
},
|
14 |
{
|
15 |
"question": "4) In which unit/apartment number?",
|
@@ -17,19 +36,19 @@
|
|
17 |
},
|
18 |
{
|
19 |
"question": "5) Am I an owner or a tenant?",
|
20 |
-
"answer": "
|
21 |
},
|
22 |
{
|
23 |
"question": "6) In which date is the work taking place?",
|
24 |
-
"answer": "7/
|
25 |
},
|
26 |
{
|
27 |
"question": "7) In which date will the work finish?",
|
28 |
-
"answer":
|
29 |
},
|
30 |
{
|
31 |
"question": "8) What is my contact number?",
|
32 |
-
"answer":
|
33 |
},
|
34 |
{
|
35 |
"question": "9) What is the name of the contracting company?",
|
@@ -37,5 +56,6 @@
|
|
37 |
},
|
38 |
{
|
39 |
"question": "10) What is the contact number of the contracting company?",
|
40 |
-
"answer":
|
41 |
-
}
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
4 |
+
"$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
|
5 |
+
"title": "Answer",
|
6 |
+
"description": "an answer to a question",
|
7 |
+
"type": "array",
|
8 |
+
"items": {
|
9 |
+
"type": "object",
|
10 |
+
"properties": {
|
11 |
+
"question": {
|
12 |
+
"type": "string"
|
13 |
+
},
|
14 |
+
"answer": {
|
15 |
+
"type": ["string", "null"]
|
16 |
+
}
|
17 |
+
},
|
18 |
+
"required": ["question", "answer"]
|
19 |
+
}
|
20 |
+
},
|
21 |
+
{
|
22 |
"question": "1) What do you need to do?",
|
23 |
+
"answer": "Pest control"
|
24 |
},
|
25 |
{
|
26 |
"question": "2) In which community is the work taking place?",
|
|
|
28 |
},
|
29 |
{
|
30 |
"question": "3) In which building?",
|
31 |
+
"answer": "Sadaf 5"
|
32 |
},
|
33 |
{
|
34 |
"question": "4) In which unit/apartment number?",
|
|
|
36 |
},
|
37 |
{
|
38 |
"question": "5) Am I an owner or a tenant?",
|
39 |
+
"answer": "unclear"
|
40 |
},
|
41 |
{
|
42 |
"question": "6) In which date is the work taking place?",
|
43 |
+
"answer": "7/9/2024"
|
44 |
},
|
45 |
{
|
46 |
"question": "7) In which date will the work finish?",
|
47 |
+
"answer": null
|
48 |
},
|
49 |
{
|
50 |
"question": "8) What is my contact number?",
|
51 |
+
"answer": null
|
52 |
},
|
53 |
{
|
54 |
"question": "9) What is the name of the contracting company?",
|
|
|
56 |
},
|
57 |
{
|
58 |
"question": "10) What is the contact number of the contracting company?",
|
59 |
+
"answer": null
|
60 |
+
}
|
61 |
+
]
|
app.py
CHANGED
@@ -1,9 +1,22 @@
|
|
1 |
import json
|
|
|
2 |
from json import JSONDecodeError
|
3 |
|
4 |
from repository.ollama import OllamaRepository
|
5 |
from schema import ModelRoles
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
if __name__ == '__main__':
|
8 |
with open("questions.txt") as questions_file:
|
9 |
questions = questions_file.read()
|
@@ -17,16 +30,49 @@ if __name__ == '__main__':
|
|
17 |
|
18 |
ollama_repository = OllamaRepository("llama3.1", system_prompt,
|
19 |
ModelRoles("system", "user", "assistant"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
|
22 |
-
answer = ollama_repository.send_prompt(verification_prompt)
|
23 |
-
json_data_start = answer["content"].index("{")
|
24 |
-
json_data_stop = answer["content"].rindex("}")
|
25 |
-
|
26 |
-
answers = "[" + answer["content"][json_data_start:json_data_stop+1] + "]"
|
27 |
-
try:
|
28 |
-
json_answers = json.loads(answers)
|
29 |
-
print(json_answers)
|
30 |
-
except JSONDecodeError as e:
|
31 |
-
print(f"unable to parse \n {answers}")
|
32 |
|
|
|
1 |
import json
|
2 |
+
import re
|
3 |
from json import JSONDecodeError
|
4 |
|
5 |
from repository.ollama import OllamaRepository
|
6 |
from schema import ModelRoles
|
7 |
|
8 |
+
# this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future
|
9 |
+
get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}')
|
10 |
+
|
11 |
+
|
12 |
+
def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes):
|
13 |
+
print(f"""so far, questions and answers were parsed successfully {total_success} times, of which
|
14 |
+
{first_try_successes} were successful at the first attempt,
|
15 |
+
{second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent
|
16 |
+
and {regex_successes} were successful using a regex
|
17 |
+
""")
|
18 |
+
|
19 |
+
|
20 |
if __name__ == '__main__':
|
21 |
with open("questions.txt") as questions_file:
|
22 |
questions = questions_file.read()
|
|
|
30 |
|
31 |
ollama_repository = OllamaRepository("llama3.1", system_prompt,
|
32 |
ModelRoles("system", "user", "assistant"))
|
33 |
+
successful = 0
|
34 |
+
corrected_json_was_ok = 0
|
35 |
+
regex_got_all_answers = 0
|
36 |
+
successful_at_first_attempt = 0
|
37 |
+
|
38 |
+
while True:
|
39 |
+
ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
|
40 |
+
answer = ollama_repository.send_prompt(verification_prompt)
|
41 |
+
ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings"
|
42 |
+
fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}")
|
43 |
+
answers = fixed_json["content"]
|
44 |
+
try:
|
45 |
+
json_answers = json.loads(answers, strict=False)
|
46 |
+
print(answers)
|
47 |
+
successful_at_first_attempt +=1
|
48 |
+
successful+=1
|
49 |
+
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers)
|
50 |
+
except JSONDecodeError as e:
|
51 |
+
print("there was a problem in this json, asking the LLM to fix it passing the exception error message")
|
52 |
+
answer = (
|
53 |
+
ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. "
|
54 |
+
f"Fix this error and return just the corrected json without adding anything else"))
|
55 |
+
|
56 |
+
print("trying to parse the corrected json")
|
57 |
+
try:
|
58 |
+
json_answers = json.loads(answer["content"])
|
59 |
+
corrected_json_was_ok+=1
|
60 |
+
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
|
61 |
+
regex_got_all_answers)
|
62 |
+
except JSONDecodeError as e:
|
63 |
+
print("still error, going old school")
|
64 |
+
regex_parse_result = {}
|
65 |
+
for match in get_questions_and_answers_regex.findall(answer["content"]):
|
66 |
+
question, answer = match
|
67 |
+
regex_parse_result[question] = answer
|
68 |
+
if len(regex_parse_result) == 10:
|
69 |
+
print("I got all 10 answers apparently")
|
70 |
+
successful+=1
|
71 |
+
regex_got_all_answers+=1
|
72 |
+
print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
|
73 |
+
regex_got_all_answers)
|
74 |
+
else:
|
75 |
+
print(f"unable to parse \n {answers}\n giving up")
|
76 |
+
break
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
repository/ollama.py
CHANGED
@@ -11,10 +11,11 @@ class OllamaRepository:
|
|
11 |
self.roles = roles
|
12 |
self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
|
13 |
|
14 |
-
def send_prompt(self, prompt):
|
15 |
options: Options = Options(temperature=0.1)
|
16 |
self.message_history.append({"role": self.roles.user_role, "content":prompt})
|
17 |
response = ollama.chat(self.model, self.message_history, options=options)
|
18 |
answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
|
19 |
-
|
|
|
20 |
return answer
|
|
|
11 |
self.roles = roles
|
12 |
self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
|
13 |
|
14 |
+
def send_prompt(self, prompt:str, add_to_history:bool = False) -> dict[str, str]:
|
15 |
options: Options = Options(temperature=0.1)
|
16 |
self.message_history.append({"role": self.roles.user_role, "content":prompt})
|
17 |
response = ollama.chat(self.model, self.message_history, options=options)
|
18 |
answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
|
19 |
+
if add_to_history:
|
20 |
+
self.message_history.append(answer)
|
21 |
return answer
|
system_prompt.txt
CHANGED
@@ -3,4 +3,3 @@ of this domain.
|
|
3 |
Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
|
4 |
Murjan, Bahar, Shams, Amwaj, Sadaf
|
5 |
for example "Murjan 1"
|
6 |
-
Today is the 6th of August 2024
|
|
|
3 |
Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
|
4 |
Murjan, Bahar, Shams, Amwaj, Sadaf
|
5 |
for example "Murjan 1"
|
|
verification_prompt.txt
CHANGED
@@ -2,12 +2,26 @@ Please tell if I answered the following questions:
|
|
2 |
|
3 |
{questions}
|
4 |
|
5 |
-
The answer should be a list of json objects
|
6 |
{
|
7 |
-
"
|
8 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
}
|
10 |
if the answer is a date format it as day/month/year.
|
11 |
If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
|
12 |
6th of september 2024.
|
13 |
-
If the answer is not provided
|
|
|
2 |
|
3 |
{questions}
|
4 |
|
5 |
+
The answer should be a list of json objects that follow this schema and be legal json.
|
6 |
{
|
7 |
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
8 |
+
"$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
|
9 |
+
"title": "Answer",
|
10 |
+
"description": "an answer to a question",
|
11 |
+
"type": "object",
|
12 |
+
"properties": {
|
13 |
+
"question": {
|
14 |
+
"description": "the question being answered",
|
15 |
+
"type": "string"
|
16 |
+
},
|
17 |
+
"answer": {
|
18 |
+
"description": "the answer to the question",
|
19 |
+
"type": "string"
|
20 |
+
|
21 |
+
}
|
22 |
+
}
|
23 |
}
|
24 |
if the answer is a date format it as day/month/year.
|
25 |
If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
|
26 |
6th of september 2024.
|
27 |
+
If the answer is not provided just the answer string should be null
|