Shreyas094 commited on
Commit
bb53ca6
·
verified ·
1 Parent(s): 63c6f0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py CHANGED
@@ -28,11 +28,13 @@ huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
28
 
29
  # Download necessary NLTK data
30
  nltk.download('punkt')
 
31
 
32
  class Agent1:
33
  def __init__(self):
34
  self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
35
  self.conjunctions = set(["and", "or"])
 
36
 
37
  def is_question(self, text: str) -> bool:
38
  words = word_tokenize(text.lower())
@@ -40,6 +42,30 @@ class Agent1:
40
  text.strip().endswith('?') or
41
  any(word in self.question_words for word in words))
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def rephrase_and_split(self, user_input: str) -> List[str]:
44
  words = word_tokenize(user_input)
45
  questions = []
@@ -61,6 +87,9 @@ class Agent1:
61
  if not questions:
62
  return [user_input]
63
 
 
 
 
64
  return questions
65
 
66
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
 
28
 
29
  # Download necessary NLTK data
30
  nltk.download('punkt')
31
+ nltk.download('averaged_perceptron_tagger')
32
 
33
  class Agent1:
34
  def __init__(self):
35
  self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
36
  self.conjunctions = set(["and", "or"])
37
+ self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
38
 
39
  def is_question(self, text: str) -> bool:
40
  words = word_tokenize(text.lower())
 
42
  text.strip().endswith('?') or
43
  any(word in self.question_words for word in words))
44
 
45
+ def replace_pronoun(self, questions: List[str]) -> List[str]:
46
+ if len(questions) < 2:
47
+ return questions
48
+
49
+ # Simple NLP to identify potential nouns in the first question
50
+ tokens = nltk.pos_tag(word_tokenize(questions[0]))
51
+ nouns = [word for word, pos in tokens if pos.startswith('NN')]
52
+
53
+ if not nouns:
54
+ return questions
55
+
56
+ # Use the last noun as the antecedent
57
+ antecedent = nouns[-1]
58
+
59
+ # Replace pronouns in subsequent questions
60
+ for i in range(1, len(questions)):
61
+ words = word_tokenize(questions[i])
62
+ for j, word in enumerate(words):
63
+ if word.lower() in self.pronouns:
64
+ words[j] = antecedent
65
+ questions[i] = ' '.join(words)
66
+
67
+ return questions
68
+
69
  def rephrase_and_split(self, user_input: str) -> List[str]:
70
  words = word_tokenize(user_input)
71
  questions = []
 
87
  if not questions:
88
  return [user_input]
89
 
90
+ # Handle pronoun replacement
91
+ questions = self.replace_pronoun(questions)
92
+
93
  return questions
94
 
95
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]: