Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -155,8 +155,17 @@ def predict_test(model, tokenizer, embedding_model, df, question, index): # sen
|
|
155 |
mostSimContext = mostSimContext.strip()
|
156 |
mostSimContext = re.sub(r'\s+', ' ', mostSimContext)
|
157 |
|
158 |
-
|
159 |
segments = sent_tokenize(mostSimContext, engine="crfcut")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
segments_index = set_index(get_embeddings(embedding_model,segments))
|
161 |
_distances,_indices = faiss_search(segments_index, question_vector)
|
162 |
mostSimSegment = segments[_indices[0][0]]
|
@@ -166,8 +175,10 @@ def predict_test(model, tokenizer, embedding_model, df, question, index): # sen
|
|
166 |
# Find the start and end indices of mostSimSegment within mostSimContext
|
167 |
start_index = mostSimContext.find(Answer)
|
168 |
end_index = start_index + len(Answer)
|
169 |
-
|
|
|
170 |
print(f"mostSimContext{len(mostSimContext)}=>{mostSimContext}\nsegments{len(segments)}=>{segments}\nmostSimSegment{len(mostSimSegment)}=>{mostSimSegment}")
|
|
|
171 |
_time = time.time() - t
|
172 |
output = {
|
173 |
"user_question": question,
|
|
|
155 |
mostSimContext = mostSimContext.strip()
|
156 |
mostSimContext = re.sub(r'\s+', ' ', mostSimContext)
|
157 |
|
|
|
158 |
segments = sent_tokenize(mostSimContext, engine="crfcut")
|
159 |
+
|
160 |
+
#=====add
|
161 |
+
if (len(segments)==1):
|
162 |
+
segments = ' '.join(segments)
|
163 |
+
|
164 |
+
segments = segments.split('และ')
|
165 |
+
segments = [segment.split('หรือ') for segment in segments]
|
166 |
+
segments = [sentence for segment in segments for sentence in segment]
|
167 |
+
#=====end
|
168 |
+
|
169 |
segments_index = set_index(get_embeddings(embedding_model,segments))
|
170 |
_distances,_indices = faiss_search(segments_index, question_vector)
|
171 |
mostSimSegment = segments[_indices[0][0]]
|
|
|
175 |
# Find the start and end indices of mostSimSegment within mostSimContext
|
176 |
start_index = mostSimContext.find(Answer)
|
177 |
end_index = start_index + len(Answer)
|
178 |
+
|
179 |
+
print(f"answer {len(answer)} => {answer} || startIndex =>{start_index} || endIndex =>{end_index}")
|
180 |
print(f"mostSimContext{len(mostSimContext)}=>{mostSimContext}\nsegments{len(segments)}=>{segments}\nmostSimSegment{len(mostSimSegment)}=>{mostSimSegment}")
|
181 |
+
|
182 |
_time = time.time() - t
|
183 |
output = {
|
184 |
"user_question": question,
|