Shreyas094 commited on
Commit
c57dd32
·
verified ·
1 Parent(s): b52d39b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -16
app.py CHANGED
@@ -8,6 +8,7 @@ import random
8
  import urllib.parse
9
  import spacy
10
  import nltk
 
11
  from nltk.tokenize import sent_tokenize
12
  from typing import List, Dict
13
  from tempfile import NamedTemporaryFile
@@ -30,28 +31,36 @@ nltk.download('punkt')
30
 
31
  class Agent1:
32
  def __init__(self):
33
- pass
 
 
 
 
 
 
 
34
 
35
  def rephrase_and_split(self, user_input: str) -> List[str]:
36
- # Identify question words
37
- question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
38
-
39
- # Split sentences
40
- sentences = sent_tokenize(user_input)
41
-
42
- # Identify questions
43
  questions = []
44
- for sent in sentences:
45
- words = sent.lower().split()
46
- if words[0] in question_words or sent.strip().endswith('?'):
47
- questions.append(sent)
48
- elif any(word in question_words for word in words):
49
- questions.append(sent)
50
-
 
 
 
 
 
 
 
51
  # If no questions identified, return the original input
52
  if not questions:
53
  return [user_input]
54
-
55
  return questions
56
 
57
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
 
8
  import urllib.parse
9
  import spacy
10
  import nltk
11
+ from nltk.tokenize import word_tokenize
12
  from nltk.tokenize import sent_tokenize
13
  from typing import List, Dict
14
  from tempfile import NamedTemporaryFile
 
31
 
32
  class Agent1:
33
  def __init__(self):
34
+ self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
35
+ self.conjunctions = set(["and", "or"])
36
+
37
+ def is_question(self, text: str) -> bool:
38
+ words = word_tokenize(text.lower())
39
+ return (words[0] in self.question_words or
40
+ text.strip().endswith('?') or
41
+ any(word in self.question_words for word in words))
42
 
43
  def rephrase_and_split(self, user_input: str) -> List[str]:
44
+ words = word_tokenize(user_input)
 
 
 
 
 
 
45
  questions = []
46
+ current_question = []
47
+
48
+ for word in words:
49
+ if word.lower() in self.conjunctions and current_question:
50
+ if self.is_question(' '.join(current_question)):
51
+ questions.append(' '.join(current_question))
52
+ current_question = []
53
+ else:
54
+ current_question.append(word)
55
+
56
+ if current_question:
57
+ if self.is_question(' '.join(current_question)):
58
+ questions.append(' '.join(current_question))
59
+
60
  # If no questions identified, return the original input
61
  if not questions:
62
  return [user_input]
63
+
64
  return questions
65
 
66
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]: