Spaces:

enricorampazzo
/

DAMHelper

Sleeping

App Files Files Community

enricorampazzo commited on Sep 7, 2024

Commit

7b864ba

1 Parent(s): d015f0c

first implementation for LLM integration

Browse files

Files changed (5) hide show

test.json +30 -10
app.py +57 -11
repository/ollama.py +3 -2
system_prompt.txt +0 -1
verification_prompt.txt +18 -4

test.json CHANGED Viewed

@@ -1,7 +1,26 @@
- [{
     "question": "1) What do you need to do?",
-    "answer": "Pest control treatment"
   },
   {
     "question": "2) In which community is the work taking place?",
@@ -9,7 +28,7 @@
   },
   {
     "question": "3) In which building?",
-    "answer": "Sadaf"
   },
   {
     "question": "4) In which unit/apartment number?",
@@ -17,19 +36,19 @@
   },
   {
     "question": "5) Am I an owner or a tenant?",
-    "answer": "Owner"
   },
   {
     "question": "6) In which date is the work taking place?",
-    "answer": "7/8/2024"
   },
   {
     "question": "7) In which date will the work finish?",
-    "answer": "Unclear" (assuming it's a one-day job, but not specified)
   },
   {
     "question": "8) What is my contact number?",
-    "answer": "Unclear"
   },
   {
     "question": "9) What is the name of the contracting company?",
@@ -37,5 +56,6 @@
   },
   {
     "question": "10) What is the contact number of the contracting company?",
-    "answer": "Unclear"
-  }]

+[
+  {
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
+    "title": "Answer",
+    "description": "an answer to a question",
+    "type": "array",
+    "items": {
+      "type": "object",
+      "properties": {
+        "question": {
+          "type": "string"
+        },
+        "answer": {
+          "type": ["string", "null"]
+        }
+      },
+      "required": ["question", "answer"]
+    }
+  },
+  {
     "question": "1) What do you need to do?",
+    "answer": "Pest control"
   },
   {
     "question": "2) In which community is the work taking place?",
   },
   {
     "question": "3) In which building?",
+    "answer": "Sadaf 5"
   },
   {
     "question": "4) In which unit/apartment number?",
   },
   {
     "question": "5) Am I an owner or a tenant?",
+    "answer": "unclear"
   },
   {
     "question": "6) In which date is the work taking place?",
+    "answer": "7/9/2024"
   },
   {
     "question": "7) In which date will the work finish?",
+    "answer": null
   },
   {
     "question": "8) What is my contact number?",
+    "answer": null
   },
   {
     "question": "9) What is the name of the contracting company?",
   },
   {
     "question": "10) What is the contact number of the contracting company?",
+    "answer": null
+  }
+]

app.py CHANGED Viewed

@@ -1,9 +1,22 @@
 import json
 from json import JSONDecodeError
 from repository.ollama import OllamaRepository
 from schema import ModelRoles
 if __name__ == '__main__':
     with open("questions.txt") as questions_file:
         questions = questions_file.read()
@@ -17,16 +30,49 @@ if __name__ == '__main__':
     ollama_repository = OllamaRepository("llama3.1", system_prompt,
                                          ModelRoles("system", "user", "assistant"))
-    ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
-    answer = ollama_repository.send_prompt(verification_prompt)
-    json_data_start = answer["content"].index("{")
-    json_data_stop = answer["content"].rindex("}")
-    answers = "[" + answer["content"][json_data_start:json_data_stop+1] + "]"
-    try:
-        json_answers = json.loads(answers)
-        print(json_answers)
-    except JSONDecodeError as e:
-        print(f"unable to parse \n {answers}")

 import json
+import re
 from json import JSONDecodeError
 from repository.ollama import OllamaRepository
 from schema import ModelRoles
+# this regex is most definitely *not* going to make my system blow up unexpectedly at some point in the future
+get_questions_and_answers_regex = re.compile(r'{\s*"question":\s*"([^"]+)",\s*"answer":\s*(\s*null\s*|"[^"]+"\s*)}')
+def print_current_status(total_success, first_try_successes, second_try_successes, regex_successes):
+    print(f"""so far, questions and answers were parsed successfully {total_success} times, of which
+              {first_try_successes} were successful at the first attempt,
+              {second_try_successes} were successful after asking the model to self-correct with a 'JSON expert' agent
+              and {regex_successes} were successful using a regex
+          """)
 if __name__ == '__main__':
     with open("questions.txt") as questions_file:
         questions = questions_file.read()
     ollama_repository = OllamaRepository("llama3.1", system_prompt,
                                          ModelRoles("system", "user", "assistant"))
+    successful = 0
+    corrected_json_was_ok = 0
+    regex_got_all_answers = 0
+    successful_at_first_attempt = 0
+    while True:
+        ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
+        answer = ollama_repository.send_prompt(verification_prompt)
+        ollama_repository.system_msg = "You are an expert at JSON format, and can detect and fix errors in JSON strings"
+        fixed_json = ollama_repository.send_prompt(f"Verify if this is legal JSON and correct any mistake, output just the fixed json without adding anything else {answer['content']}")
+        answers = fixed_json["content"]
+        try:
+            json_answers = json.loads(answers, strict=False)
+            print(answers)
+            successful_at_first_attempt +=1
+            successful+=1
+            print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok, regex_got_all_answers)
+        except JSONDecodeError as e:
+            print("there was a problem in this json, asking the LLM to fix it passing the exception error message")
+            answer = (
+                ollama_repository.send_prompt(f"When parsing this json {answers} I got this error {str(e)}. "
+                                              f"Fix this error and return just the corrected json without adding anything else"))
+            print("trying to parse the corrected json")
+            try:
+                json_answers = json.loads(answer["content"])
+                corrected_json_was_ok+=1
+                print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
+                                     regex_got_all_answers)
+            except JSONDecodeError as e:
+                print("still error, going old school")
+                regex_parse_result = {}
+                for match in get_questions_and_answers_regex.findall(answer["content"]):
+                    question, answer = match
+                    regex_parse_result[question] = answer
+                if len(regex_parse_result) == 10:
+                    print("I got all 10 answers apparently")
+                    successful+=1
+                    regex_got_all_answers+=1
+                    print_current_status(successful, successful_at_first_attempt, corrected_json_was_ok,
+                                         regex_got_all_answers)
+                else:
+                    print(f"unable to parse \n {answers}\n giving up")
+                    break

repository/ollama.py CHANGED Viewed

@@ -11,10 +11,11 @@ class OllamaRepository:
         self.roles = roles
         self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
-    def send_prompt(self, prompt):
         options: Options = Options(temperature=0.1)
         self.message_history.append({"role": self.roles.user_role, "content":prompt})
         response = ollama.chat(self.model, self.message_history, options=options)
         answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
-        self.message_history.append(answer)
         return answer

         self.roles = roles
         self.message_history: list[dict[str, str]] = [{"role": self.roles.system_role, "content": system_msg}]
+    def send_prompt(self, prompt:str, add_to_history:bool = False) -> dict[str, str]:
         options: Options = Options(temperature=0.1)
         self.message_history.append({"role": self.roles.user_role, "content":prompt})
         response = ollama.chat(self.model, self.message_history, options=options)
         answer = {"role": self.roles.ai_role, "content": response["message"]["content"]}
+        if add_to_history:
+            self.message_history.append(answer)
         return answer

system_prompt.txt CHANGED Viewed

@@ -3,4 +3,3 @@ of this domain.
 Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
 Murjan, Bahar, Shams, Amwaj, Sadaf
 for example "Murjan 1"
-Today is the 6th of August 2024

 Keep in mind that 'JBR' is a community, and buildings are named after the following names followed by a number:
 Murjan, Bahar, Shams, Amwaj, Sadaf
 for example "Murjan 1"

verification_prompt.txt CHANGED Viewed

@@ -2,12 +2,26 @@ Please tell if I answered the following questions:
 {questions}
-The answer should be a list of json objects formatted as follows:
 {
-"question": "<question>"
-"answer": "<answer>"
 }
 if the answer is a date format it as day/month/year.
 If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
 6th of september 2024.
-If the answer is not provided say "Unclear"

 {questions}
+The answer should be a list of json objects that follow this schema and be legal json.
 {
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://www.enricorampazzo.tech/dam_helper.schema.json",
+  "title": "Answer",
+  "description": "an answer to a question",
+  "type": "object",
+  "properties": {
+    "question": {
+        "description": "the question being answered",
+        "type": "string"
+    },
+    "answer": {
+        "description": "the answer to the question",
+        "type": "string"
+    }
+  }
 }
 if the answer is a date format it as day/month/year.
 If the answer contains temporal references such as 'tomorrow', 'in two days' etc. consider that today it is the
 6th of september 2024.
+If the answer is not provided just the answer string should be null