File size: 10,292 Bytes
39b18be
 
 
9d5b4c0
 
39b18be
9d5b4c0
39b18be
9d5b4c0
 
 
 
 
 
 
 
fe70438
9d5b4c0
 
 
88c61d3
 
fe70438
 
d346c89
9d5b4c0
 
 
 
 
 
 
 
 
24df49f
39b18be
 
 
9d5b4c0
 
 
 
 
 
 
 
 
 
 
24df49f
39b18be
 
 
9d5b4c0
 
 
 
 
 
39b18be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
import re
from contextlib import contextmanager
from typing import Any, Optional

from .logging_utils import get_logger
from .settings_utils import get_constants

constants = get_constants()
logger = get_logger()


class Documentation:
    URL = "https://www.unitxt.ai/en/latest/"
    HUGGINGFACE_METRICS = "docs/adding_metric.html#adding-a-hugginface-metric"
    ADDING_TASK = "docs/adding_task.html"
    ADDING_TEMPLATE = "docs/adding_template.html"
    POST_PROCESSORS = "docs/adding_template.html#post-processors"
    MULTIPLE_METRICS_OUTPUTS = (
        "docs/adding_metric.html#metric-outputs-with-multiple-metrics"
    )
    EVALUATION = "docs/evaluating_datasets.html"
    BENCHMARKS = "docs/benchmark.html"
    DATA_CLASSIFICATION_POLICY = "docs/data_classification_policy.html"
    CATALOG = "docs/saving_and_loading_from_catalog.html"
    SETTINGS = "docs/settings.html"


def additional_info(path: str) -> str:
    return f"\nFor more information: see {Documentation.URL}/{path} \n"


class UnitxtError(Exception):
    """Exception raised for Unitxt errors.

    Args:
        message (str): explanation of the error
        additional_info_id (Optional[str]): relative path to additional documentation on web
            If set, should be one of the DOCUMENTATION_* constants in the error_utils.py file.
    """

    def __init__(self, message: str, additional_info_id: Optional[str] = None):
        if additional_info_id is not None:
            message += additional_info(additional_info_id)
        super().__init__(message)


class UnitxtWarning:
    """Object to format warning message to log.

    Args:
        message (str): explanation of the warning
        additional_info_id (Optional[str]): relative path to additional documentation on web
            If set, should be one of the DOCUMENTATION_* constants in the error_utils.py file.
    """

    def __init__(self, message: str, additional_info_id: Optional[str] = None):
        if additional_info_id is not None:
            message += additional_info(additional_info_id)
        logger.warning(message)


context_block_title = "πŸ¦„ Unitxt Error Context"


def _visible_length(text: str) -> int:
    import unicodedata

    ansi_escape = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]|\x1b\]8;;[^\x1b]*\x1b\\")
    clean_text = ansi_escape.sub("", text)
    width = 0
    for char in clean_text:
        if (
            unicodedata.east_asian_width(char) in ("F", "W")
            or 0x1F300 <= ord(char) <= 0x1F9FF
        ):
            width += 2
        else:
            width += 1
    return width


def _make_object_clickable(
    full_obj_name: str, display_name: Optional[str] = None
) -> str:
    import os

    if display_name is None:
        display_name = full_obj_name.split(".")[-1]
    if full_obj_name.startswith("unitxt."):
        parts = full_obj_name.split(".")
        if len(parts) >= 2:
            module_path = ".".join(parts[:2])
            doc_url = f"{Documentation.URL}{module_path}.html#{full_obj_name}"
            if (
                os.environ.get("TERM_PROGRAM") in ["iTerm.app", "vscode"]
                or os.environ.get("TERMINAL_EMULATOR") == "JetBrains-JediTerm"
            ):
                return f"\033]8;;{doc_url}\033\\{display_name}\033]8;;\033\\"
            return f"{display_name} ({doc_url})"
    return display_name


def _get_existing_context(error: Exception):
    """Extract existing context from an error if it exists."""
    if hasattr(error, "__error_context__"):
        existing = error.__error_context__
        return (
            existing["original_message"],
            existing["context_object"],
            existing["context"],
        )
    return str(error), None, {}


def _format_object_context(obj: Any) -> Optional[str]:
    """Format an object for display in error context."""
    if obj is None:
        return None
    if hasattr(obj, "__class__"):
        class_name = obj.__class__.__name__
        module_name = getattr(obj.__class__, "__module__", "")
    else:
        obj_type = type(obj)
        class_name = obj_type.__name__
        module_name = getattr(obj_type, "__module__", "")
    if module_name:
        full_name = f"{module_name}.{class_name}"
        clickable_object = _make_object_clickable(full_name, class_name)
        return f"Object: {clickable_object}"
    return f"Object: {class_name}"


def _make_clickable_link(url: str) -> str:
    """Create a clickable terminal link."""
    import os

    if (
        os.environ.get("TERM_PROGRAM") in ["iTerm.app", "vscode"]
        or os.environ.get("TERMINAL_EMULATOR") == "JetBrains-JediTerm"
    ):
        return f"\033]8;;{url}\033\\link\033]8;;\033\\"
    return url


def _format_help_context(help_docs) -> list:
    """Format help documentation into context parts."""
    parts = []
    if isinstance(help_docs, str):
        parts.append(f"Help: {_make_clickable_link(help_docs)}")
    elif isinstance(help_docs, dict):
        for label, url in help_docs.items():
            parts.append(f"Help ({label}): {_make_clickable_link(url)}")
    elif isinstance(help_docs, list):
        for item in help_docs:
            if isinstance(item, dict) and len(item) == 1:
                label, url = next(iter(item.items()))
                parts.append(f"Help ({label}): {_make_clickable_link(url)}")
            elif isinstance(item, str):
                parts.append(f"Help: {_make_clickable_link(item)}")
    return parts


def _build_context_parts(context_object: Any, context: dict) -> list:
    """Build the list of context information parts."""
    parts = []
    ordered_keys = [
        "Python",
        "Unitxt",
        "Stage",
        "Stream",
        "Index",
        "Instance",
        "Object",
        "Action",
    ]
    processed_keys = set()

    for desired_key in ordered_keys:
        for actual_key in context.keys():
            if actual_key.lower() == desired_key.lower():
                value = (
                    "unknown" if context[actual_key] is None else context[actual_key]
                )
                parts.append(f"{actual_key.replace('_', ' ').title()}: {value}")
                processed_keys.add(actual_key)
                break

    if not any(key.lower() == "object" for key in processed_keys):
        obj_context = _format_object_context(context_object)
        if obj_context:
            parts.append(obj_context)

    processed_keys.add("help")
    for key, value in context.items():
        if key not in processed_keys:
            value = "unknown" if value is None else value
            parts.append(f"{key.replace('_', ' ').title()}: {value}")

    if "help" in context:
        parts.extend(_format_help_context(context["help"]))
    else:
        parts.append(f"Help: {_make_clickable_link(Documentation.URL)}")

    return parts


def _create_context_box(parts: list) -> str:
    """Create a formatted box containing context information."""
    if not parts:
        return ""
    max_width = (
        max(
            _visible_length(context_block_title),
            max(_visible_length(part) for part in parts),
        )
        + 4
    )
    top_line = "β”Œ" + "─" * max_width + "┐"
    bottom_line = "β””" + "─" * max_width + "β”˜"
    lines = [top_line]
    lines.append(
        f"β”‚ {context_block_title}{' ' * (max_width - _visible_length(context_block_title) - 1)}β”‚"
    )
    lines.append(f"β”‚ {'-' * (max_width - 2)} β”‚")
    for part in parts:
        padding = " " * (max_width - _visible_length(part) - 4)
        lines.append(f"β”‚  - {part}{padding}β”‚")
    lines.append(bottom_line)
    return "\n".join(lines)


def _store_context_attributes(
    error: Exception, context_object: Any, context: dict, original_message: str
):
    """Store context information in error attributes."""
    error.__error_context__ = {
        "context_object": context_object,
        "context": context,
        "original_message": original_message,
    }
    try:
        error.original_error = type(error)(original_message)
    except (TypeError, ValueError):
        error.original_error = Exception(original_message)
    error.context_object = context_object
    error.context = context


def _add_context_to_exception(
    original_error: Exception, context_object: Any = None, **context
):
    """Add context information to an exception by modifying its message."""
    original_message, existing_object, existing_context = _get_existing_context(
        original_error
    )
    final_context_object = existing_object or context_object
    final_context = {
        "Unitxt": constants.version,
        "Python": constants.python,
        **existing_context,
        **context,
    }
    context_parts = _build_context_parts(final_context_object, final_context)
    context_message = _create_context_box(context_parts)
    _store_context_attributes(
        original_error, final_context_object, final_context, original_message
    )
    if context_parts:
        formatted_message = f"\n{context_message}\n\n{original_message}"
        original_error.args = (formatted_message,)
    else:
        original_error.args = (original_message,)


@contextmanager
def error_context(context_object: Any = None, **context):
    """Context manager that catches exceptions and re-raises them with additional context.

    Args:
        context_object: The object being processed (optional)
        **context: Any additional context to include in the error message.
                  You can provide any key-value pairs that help identify where the error occurred.

                  Special context keys:
                  - help: Documentation links to help with the error.
                    Can be a string (single URL), dict (label: URL), or list of URLs/dicts.

    Examples:
        with error_context(self, operation="validation", item_id=42):
            result = process_item(item)

        with error_context(operation="schema_validation", help="https://docs.example.com/schema"):
            validate_schema(data)

        with error_context(processor, step="preprocessing", batch_size=32):
            results = process_batch(batch)
    """
    try:
        yield
    except Exception as e:
        _add_context_to_exception(e, context_object, **context)
        raise