Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,7 @@ class Agent1:
|
|
35 |
self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
|
36 |
self.conjunctions = set(["and", "or"])
|
37 |
self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
|
38 |
-
self.context = {}
|
39 |
|
40 |
def is_question(self, text: str) -> bool:
|
41 |
words = word_tokenize(text.lower())
|
@@ -58,13 +58,11 @@ class Agent1:
|
|
58 |
if len(questions) < 2:
|
59 |
return questions
|
60 |
|
61 |
-
# Find the subject in the first question
|
62 |
subject = self.find_subject(questions[0])
|
63 |
|
64 |
if not subject:
|
65 |
return questions
|
66 |
|
67 |
-
# Replace pronouns in subsequent questions
|
68 |
for i in range(1, len(questions)):
|
69 |
words = word_tokenize(questions[i])
|
70 |
for j, word in enumerate(words):
|
@@ -91,60 +89,59 @@ class Agent1:
|
|
91 |
if self.is_question(' '.join(current_question)):
|
92 |
questions.append(' '.join(current_question))
|
93 |
|
94 |
-
# If no questions identified, return the original input
|
95 |
if not questions:
|
96 |
return [user_input]
|
97 |
|
98 |
-
# Handle pronoun replacement
|
99 |
questions = self.replace_pronoun(questions)
|
100 |
|
101 |
return questions
|
102 |
|
103 |
def update_context(self, query: str):
|
104 |
tokens = nltk.pos_tag(word_tokenize(query))
|
105 |
-
|
106 |
current_phrase = []
|
107 |
|
108 |
for word, tag in tokens:
|
109 |
-
if tag.startswith('NN') or tag.startswith('JJ')
|
110 |
current_phrase.append(word)
|
111 |
else:
|
112 |
if current_phrase:
|
113 |
-
|
114 |
current_phrase = []
|
115 |
|
116 |
if current_phrase:
|
117 |
-
|
118 |
|
119 |
-
if
|
120 |
-
self.context['main_topic'] =
|
121 |
-
self.context['related_topics'] =
|
122 |
|
123 |
def apply_context(self, query: str) -> str:
|
124 |
words = word_tokenize(query.lower())
|
125 |
|
126 |
-
# Check if the query is short, contains pronouns, or doesn't contain the main topic
|
127 |
if (len(words) <= 5 or
|
128 |
any(word in self.pronouns for word in words) or
|
129 |
(self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
141 |
return query
|
142 |
|
143 |
def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
|
144 |
-
# First, update context with the new input
|
145 |
self.update_context(user_input)
|
146 |
-
|
147 |
-
# Then apply context and split queries
|
148 |
contextualized_input = self.apply_context(user_input)
|
149 |
queries = self.rephrase_and_split(contextualized_input)
|
150 |
print("Identified queries:", queries)
|
|
|
35 |
self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
|
36 |
self.conjunctions = set(["and", "or"])
|
37 |
self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
|
38 |
+
self.context = {}
|
39 |
|
40 |
def is_question(self, text: str) -> bool:
|
41 |
words = word_tokenize(text.lower())
|
|
|
58 |
if len(questions) < 2:
|
59 |
return questions
|
60 |
|
|
|
61 |
subject = self.find_subject(questions[0])
|
62 |
|
63 |
if not subject:
|
64 |
return questions
|
65 |
|
|
|
66 |
for i in range(1, len(questions)):
|
67 |
words = word_tokenize(questions[i])
|
68 |
for j, word in enumerate(words):
|
|
|
89 |
if self.is_question(' '.join(current_question)):
|
90 |
questions.append(' '.join(current_question))
|
91 |
|
|
|
92 |
if not questions:
|
93 |
return [user_input]
|
94 |
|
|
|
95 |
questions = self.replace_pronoun(questions)
|
96 |
|
97 |
return questions
|
98 |
|
99 |
def update_context(self, query: str):
|
100 |
tokens = nltk.pos_tag(word_tokenize(query))
|
101 |
+
noun_phrases = []
|
102 |
current_phrase = []
|
103 |
|
104 |
for word, tag in tokens:
|
105 |
+
if tag.startswith('NN') or tag.startswith('JJ'):
|
106 |
current_phrase.append(word)
|
107 |
else:
|
108 |
if current_phrase:
|
109 |
+
noun_phrases.append(' '.join(current_phrase))
|
110 |
current_phrase = []
|
111 |
|
112 |
if current_phrase:
|
113 |
+
noun_phrases.append(' '.join(current_phrase))
|
114 |
|
115 |
+
if noun_phrases:
|
116 |
+
self.context['main_topic'] = noun_phrases[0]
|
117 |
+
self.context['related_topics'] = noun_phrases[1:]
|
118 |
|
119 |
def apply_context(self, query: str) -> str:
|
120 |
words = word_tokenize(query.lower())
|
121 |
|
|
|
122 |
if (len(words) <= 5 or
|
123 |
any(word in self.pronouns for word in words) or
|
124 |
(self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
|
125 |
|
126 |
+
new_query_parts = []
|
127 |
+
main_topic_added = False
|
128 |
+
|
129 |
+
for word in words:
|
130 |
+
if word in self.pronouns and self.context.get('main_topic'):
|
131 |
+
new_query_parts.append(self.context['main_topic'])
|
132 |
+
main_topic_added = True
|
133 |
+
else:
|
134 |
+
new_query_parts.append(word)
|
135 |
+
|
136 |
+
if not main_topic_added and self.context.get('main_topic'):
|
137 |
+
new_query_parts.append(f"of {self.context['main_topic']}")
|
138 |
+
|
139 |
+
query = ' '.join(new_query_parts)
|
140 |
+
|
141 |
return query
|
142 |
|
143 |
def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
|
|
|
144 |
self.update_context(user_input)
|
|
|
|
|
145 |
contextualized_input = self.apply_context(user_input)
|
146 |
queries = self.rephrase_and_split(contextualized_input)
|
147 |
print("Identified queries:", queries)
|