Shreyas094 commited on
Commit
303be9c
·
verified ·
1 Parent(s): 7849340

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -26
app.py CHANGED
@@ -35,7 +35,7 @@ class Agent1:
35
  self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
36
  self.conjunctions = set(["and", "or"])
37
  self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
38
- self.context = {} # Store multiple context elements
39
 
40
  def is_question(self, text: str) -> bool:
41
  words = word_tokenize(text.lower())
@@ -58,13 +58,11 @@ class Agent1:
58
  if len(questions) < 2:
59
  return questions
60
 
61
- # Find the subject in the first question
62
  subject = self.find_subject(questions[0])
63
 
64
  if not subject:
65
  return questions
66
 
67
- # Replace pronouns in subsequent questions
68
  for i in range(1, len(questions)):
69
  words = word_tokenize(questions[i])
70
  for j, word in enumerate(words):
@@ -91,60 +89,59 @@ class Agent1:
91
  if self.is_question(' '.join(current_question)):
92
  questions.append(' '.join(current_question))
93
 
94
- # If no questions identified, return the original input
95
  if not questions:
96
  return [user_input]
97
 
98
- # Handle pronoun replacement
99
  questions = self.replace_pronoun(questions)
100
 
101
  return questions
102
 
103
  def update_context(self, query: str):
104
  tokens = nltk.pos_tag(word_tokenize(query))
105
- important_phrases = []
106
  current_phrase = []
107
 
108
  for word, tag in tokens:
109
- if tag.startswith('NN') or tag.startswith('JJ') or tag == 'NNP':
110
  current_phrase.append(word)
111
  else:
112
  if current_phrase:
113
- important_phrases.append(' '.join(current_phrase))
114
  current_phrase = []
115
 
116
  if current_phrase:
117
- important_phrases.append(' '.join(current_phrase))
118
 
119
- if important_phrases:
120
- self.context['main_topic'] = important_phrases[0] # Use the first important phrase as main topic
121
- self.context['related_topics'] = important_phrases[1:] # Store other phrases as related topics
122
 
123
  def apply_context(self, query: str) -> str:
124
  words = word_tokenize(query.lower())
125
 
126
- # Check if the query is short, contains pronouns, or doesn't contain the main topic
127
  if (len(words) <= 5 or
128
  any(word in self.pronouns for word in words) or
129
  (self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
130
 
131
- # Apply main topic context
132
- if 'main_topic' in self.context:
133
- query = f"{self.context['main_topic']} {query}"
134
-
135
- # Apply related topics if query is very short
136
- if len(words) <= 3 and 'related_topics' in self.context:
137
- for topic in self.context['related_topics']:
138
- if topic.lower() not in query.lower():
139
- query += f" {topic}"
140
-
 
 
 
 
 
141
  return query
142
 
143
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
144
- # First, update context with the new input
145
  self.update_context(user_input)
146
-
147
- # Then apply context and split queries
148
  contextualized_input = self.apply_context(user_input)
149
  queries = self.rephrase_and_split(contextualized_input)
150
  print("Identified queries:", queries)
 
35
  self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
36
  self.conjunctions = set(["and", "or"])
37
  self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
38
+ self.context = {}
39
 
40
  def is_question(self, text: str) -> bool:
41
  words = word_tokenize(text.lower())
 
58
  if len(questions) < 2:
59
  return questions
60
 
 
61
  subject = self.find_subject(questions[0])
62
 
63
  if not subject:
64
  return questions
65
 
 
66
  for i in range(1, len(questions)):
67
  words = word_tokenize(questions[i])
68
  for j, word in enumerate(words):
 
89
  if self.is_question(' '.join(current_question)):
90
  questions.append(' '.join(current_question))
91
 
 
92
  if not questions:
93
  return [user_input]
94
 
 
95
  questions = self.replace_pronoun(questions)
96
 
97
  return questions
98
 
99
  def update_context(self, query: str):
100
  tokens = nltk.pos_tag(word_tokenize(query))
101
+ noun_phrases = []
102
  current_phrase = []
103
 
104
  for word, tag in tokens:
105
+ if tag.startswith('NN') or tag.startswith('JJ'):
106
  current_phrase.append(word)
107
  else:
108
  if current_phrase:
109
+ noun_phrases.append(' '.join(current_phrase))
110
  current_phrase = []
111
 
112
  if current_phrase:
113
+ noun_phrases.append(' '.join(current_phrase))
114
 
115
+ if noun_phrases:
116
+ self.context['main_topic'] = noun_phrases[0]
117
+ self.context['related_topics'] = noun_phrases[1:]
118
 
119
  def apply_context(self, query: str) -> str:
120
  words = word_tokenize(query.lower())
121
 
 
122
  if (len(words) <= 5 or
123
  any(word in self.pronouns for word in words) or
124
  (self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
125
 
126
+ new_query_parts = []
127
+ main_topic_added = False
128
+
129
+ for word in words:
130
+ if word in self.pronouns and self.context.get('main_topic'):
131
+ new_query_parts.append(self.context['main_topic'])
132
+ main_topic_added = True
133
+ else:
134
+ new_query_parts.append(word)
135
+
136
+ if not main_topic_added and self.context.get('main_topic'):
137
+ new_query_parts.append(f"of {self.context['main_topic']}")
138
+
139
+ query = ' '.join(new_query_parts)
140
+
141
  return query
142
 
143
  def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
 
144
  self.update_context(user_input)
 
 
145
  contextualized_input = self.apply_context(user_input)
146
  queries = self.rephrase_and_split(contextualized_input)
147
  print("Identified queries:", queries)