Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 20, 2024

Commit

c57dd32

verified ·

1 Parent(s): b52d39b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -16

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import random
 import urllib.parse
 import spacy
 import nltk
 from nltk.tokenize import sent_tokenize
 from typing import List, Dict
 from tempfile import NamedTemporaryFile
@@ -30,28 +31,36 @@ nltk.download('punkt')
 class Agent1:
     def __init__(self):
-        pass
     def rephrase_and_split(self, user_input: str) -> List[str]:
-        # Identify question words
-        question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
-        # Split sentences
-        sentences = sent_tokenize(user_input)
-        # Identify questions
         questions = []
-        for sent in sentences:
-            words = sent.lower().split()
-            if words[0] in question_words or sent.strip().endswith('?'):
-                questions.append(sent)
-            elif any(word in question_words for word in words):
-                questions.append(sent)
         # If no questions identified, return the original input
         if not questions:
             return [user_input]
         return questions
     def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:

 import urllib.parse
 import spacy
 import nltk
+from nltk.tokenize import word_tokenize
 from nltk.tokenize import sent_tokenize
 from typing import List, Dict
 from tempfile import NamedTemporaryFile
 class Agent1:
     def __init__(self):
+        self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
+        self.conjunctions = set(["and", "or"])
+    def is_question(self, text: str) -> bool:
+        words = word_tokenize(text.lower())
+        return (words[0] in self.question_words or
+                text.strip().endswith('?') or
+                any(word in self.question_words for word in words))
     def rephrase_and_split(self, user_input: str) -> List[str]:
+        words = word_tokenize(user_input)
         questions = []
+        current_question = []
+        for word in words:
+            if word.lower() in self.conjunctions and current_question:
+                if self.is_question(' '.join(current_question)):
+                    questions.append(' '.join(current_question))
+                current_question = []
+            else:
+                current_question.append(word)
+        if current_question:
+            if self.is_question(' '.join(current_question)):
+                questions.append(' '.join(current_question))
         # If no questions identified, return the original input
         if not questions:
             return [user_input]
         return questions
     def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]: