Spaces:

anamargarida
/

Trial

Sleeping

App Files Files Community

anamargarida commited on Mar 12

Commit

b43e4bd

verified ·

1 Parent(s): adc4b2b

Rename app_26.py to app_27.py

Browse files

Files changed (1) hide show

app_26.py → app_27.py +67 -2

app_26.py → app_27.py RENAMED Viewed

@@ -331,14 +331,79 @@ def extract_arguments(text, tokenizer, model, beam_search=True):
         return text
     # Apply the tags to the sentence tokens
-    tagged_sentence1 = add_tags_offset_2(input_text, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
-    tagged_sentence2 = add_tags_offset_2(input_text, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
     return tagged_sentence1, tagged_sentence2

         return text
+    import re
+    def add_tags_offset_3(text, start_cause, end_cause, start_effect, end_effect, start_signal, end_signal):
+        """
+        Inserts tags into the original text based on token offsets, ensuring correct nesting,
+        avoiding empty tags, preventing duplication, and handling punctuation placement.
+        Args:
+            text (str): The original input text.
+            offset_mapping (list of tuples): Maps token indices to character spans.
+            start_cause (int): Start token index of the cause span.
+            end_cause (int): End token index of the cause span.
+            start_effect (int): Start token index of the effect span.
+            end_effect (int): End token index of the effect span.
+            start_signal (int, optional): Start token index of the signal span.
+            end_signal (int, optional): End token index of the signal span.
+        Returns:
+            str: The modified text with correctly positioned annotated spans.
+        """
+        # Convert token indices to character indices
+        spans = []
+        # Function to adjust start position to avoid punctuation issues
+        def adjust_start(text, start):
+            while start < len(text) and text[start] in {',', ' ', '.', ';', ':'}:
+                start += 1  # Move past punctuation
+            return start
+        # Ensure valid spans (avoid empty tags)
+        if start_cause is not None and end_cause is not None and start_cause < end_cause:
+            start_cause_char, end_cause_char = offset_mapping[start_cause][0], offset_mapping[end_cause][1]
+            spans.append((start_cause_char, end_cause_char, "<ARG0>", "</ARG0>"))
+        if start_effect is not None and end_effect is not None and start_effect < end_effect:
+            start_effect_char, end_effect_char = offset_mapping[start_effect][0], offset_mapping[end_effect][1]
+            start_effect_char = adjust_start(text, start_effect_char)  # Skip punctuation
+            spans.append((start_effect_char, end_effect_char, "<ARG1>", "</ARG1>"))
+        if start_signal is not None and end_signal is not None and start_signal < end_signal:
+            start_signal_char, end_signal_char = offset_mapping[start_signal][0], offset_mapping[end_signal][1]
+            spans.append((start_signal_char, end_signal_char, "<SIG0>", "</SIG0>"))
+        # Sort spans in reverse order based on start index (to avoid shifting issues)
+        spans.sort(reverse=True, key=lambda x: x[0])
+        # Insert tags correctly
+        modified_text = text
+        inserted_positions = []
+        for start, end, open_tag, close_tag in spans:
+            # Adjust positions based on previous insertions
+            shift = sum(len(tag) for pos, tag in inserted_positions if pos <= start)
+            start += shift
+            end += shift
+            # Ensure valid start/end to prevent empty tags
+            if start < end:
+                modified_text = modified_text[:start] + open_tag + modified_text[start:end] + close_tag + modified_text[end:]
+                inserted_positions.append((start, open_tag))
+                inserted_positions.append((end + len(open_tag), close_tag))
+        return modified_text
     # Apply the tags to the sentence tokens
+    tagged_sentence1 = add_tags_offset_3(input_text, start_cause1, end_cause1, start_effect1, end_effect1, start_signal, end_signal)
+    tagged_sentence2 = add_tags_offset_3(input_text, start_cause2, end_cause2, start_effect2, end_effect2, start_signal, end_signal)
     return tagged_sentence1, tagged_sentence2