Spaces:
Sleeping
Sleeping
Rename app_26.py to app_27.py
Browse files- app_26.py → app_27.py +67 -2
app_26.py → app_27.py
RENAMED
@@ -331,14 +331,79 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
|
|
331 |
|
332 |
return text
|
333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
|
335 |
|
336 |
|
337 |
# Apply the tags to the sentence tokens
|
338 |
-
tagged_sentence1 =
|
339 |
-
tagged_sentence2 =
|
340 |
return tagged_sentence1, tagged_sentence2
|
341 |
|
|
|
342 |
|
343 |
|
344 |
|
|
|
331 |
|
332 |
return text
|
333 |
|
334 |
+
import re
|
335 |
+
|
336 |
+
def add_tags_offset_3(text, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
|
337 |
+
"""
|
338 |
+
Inserts tags into the original text based on token offsets, ensuring correct nesting,
|
339 |
+
avoiding empty tags, preventing duplication, and handling punctuation placement.
|
340 |
+
|
341 |
+
Args:
|
342 |
+
text (str): The original input text.
|
343 |
+
offset_mapping (list of tuples): Maps token indices to character spans.
|
344 |
+
start_cause (int): Start token index of the cause span.
|
345 |
+
end_cause (int): End token index of the cause span.
|
346 |
+
start_effect (int): Start token index of the effect span.
|
347 |
+
end_effect (int): End token index of the effect span.
|
348 |
+
start_signal (int, optional): Start token index of the signal span.
|
349 |
+
end_signal (int, optional): End token index of the signal span.
|
350 |
+
|
351 |
+
Returns:
|
352 |
+
str: The modified text with correctly positioned annotated spans.
|
353 |
+
"""
|
354 |
+
|
355 |
+
# Convert token indices to character indices
|
356 |
+
spans = []
|
357 |
+
|
358 |
+
# Function to adjust start position to avoid punctuation issues
|
359 |
+
def adjust_start(text, start):
|
360 |
+
while start < len(text) and text[start] in {',', ' ', '.', ';', ':'}:
|
361 |
+
start += 1 # Move past punctuation
|
362 |
+
return start
|
363 |
+
|
364 |
+
# Ensure valid spans (avoid empty tags)
|
365 |
+
if start_cause is not None and end_cause is not None and start_cause < end_cause:
|
366 |
+
start_cause_char, end_cause_char = offset_mapping[start_cause][0], offset_mapping[end_cause][1]
|
367 |
+
spans.append((start_cause_char, end_cause_char, "<ARG0>", "</ARG0>"))
|
368 |
+
|
369 |
+
if start_effect is not None and end_effect is not None and start_effect < end_effect:
|
370 |
+
start_effect_char, end_effect_char = offset_mapping[start_effect][0], offset_mapping[end_effect][1]
|
371 |
+
start_effect_char = adjust_start(text, start_effect_char) # Skip punctuation
|
372 |
+
spans.append((start_effect_char, end_effect_char, "<ARG1>", "</ARG1>"))
|
373 |
+
|
374 |
+
if start_signal is not None and end_signal is not None and start_signal < end_signal:
|
375 |
+
start_signal_char, end_signal_char = offset_mapping[start_signal][0], offset_mapping[end_signal][1]
|
376 |
+
spans.append((start_signal_char, end_signal_char, "<SIG0>", "</SIG0>"))
|
377 |
+
|
378 |
+
# Sort spans in reverse order based on start index (to avoid shifting issues)
|
379 |
+
spans.sort(reverse=True, key=lambda x: x[0])
|
380 |
+
|
381 |
+
# Insert tags correctly
|
382 |
+
modified_text = text
|
383 |
+
inserted_positions = []
|
384 |
+
|
385 |
+
for start, end, open_tag, close_tag in spans:
|
386 |
+
# Adjust positions based on previous insertions
|
387 |
+
shift = sum(len(tag) for pos, tag in inserted_positions if pos <= start)
|
388 |
+
start += shift
|
389 |
+
end += shift
|
390 |
+
|
391 |
+
# Ensure valid start/end to prevent empty tags
|
392 |
+
if start < end:
|
393 |
+
modified_text = modified_text[:start] + open_tag + modified_text[start:end] + close_tag + modified_text[end:]
|
394 |
+
inserted_positions.append((start, open_tag))
|
395 |
+
inserted_positions.append((end + len(open_tag), close_tag))
|
396 |
+
|
397 |
+
return modified_text
|
398 |
|
399 |
|
400 |
|
401 |
# Apply the tags to the sentence tokens
|
402 |
+
tagged_sentence1 = add_tags_offset_3(input_text, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
|
403 |
+
tagged_sentence2 = add_tags_offset_3(input_text, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
|
404 |
return tagged_sentence1, tagged_sentence2
|
405 |
|
406 |
+
|
407 |
|
408 |
|
409 |
|