Spaces:

unitxt
/

metric

Running

App Files Files Community

Elron commited on May 4

Commit

66630b0

verified ·

1 Parent(s): 43c8216

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

collections_operators.py +5 -1
dataset.py +1 -0
inference.py +21 -1
metric.py +1 -0
metrics.py +73 -0
operators.py +1 -1
schema.py +3 -0
serializers.py +31 -0
templates.py +4 -0
tool_calling.py +119 -0
type_utils.py +5 -0
types.py +17 -1
version.py +1 -1

collections_operators.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Generator, List, Optional
 from .dict_utils import dict_get, dict_set
 from .operators import FieldOperator, StreamOperator
@@ -12,6 +12,10 @@ class Dictify(FieldOperator):
     def process_value(self, tup: Any) -> Any:
         return dict(zip(self.with_keys, tup))
 class Wrap(FieldOperator):
     inside: str

+from typing import Any, Dict, Generator, List, Optional
 from .dict_utils import dict_get, dict_set
 from .operators import FieldOperator, StreamOperator
     def process_value(self, tup: Any) -> Any:
         return dict(zip(self.with_keys, tup))
+class DictToTuplesList(FieldOperator):
+    def process_value(self, dic: Dict) -> Any:
+        return list(dic.items())
 class Wrap(FieldOperator):
     inside: str

dataset.py CHANGED Viewed

@@ -68,6 +68,7 @@ from .system_prompts import __file__ as _
 from .task import __file__ as _
 from .templates import __file__ as _
 from .text_utils import __file__ as _
 from .type_utils import __file__ as _
 from .types import __file__ as _
 from .utils import __file__ as _

 from .task import __file__ as _
 from .templates import __file__ as _
 from .text_utils import __file__ as _
+from .tool_calling import __file__ as _
 from .type_utils import __file__ as _
 from .types import __file__ as _
 from .utils import __file__ as _

inference.py CHANGED Viewed

@@ -342,6 +342,14 @@ class InferenceEngine(Artifact):
             }
         ]
 class LogProbInferenceEngine(abc.ABC, Artifact):
     """Abstract base class for inference with log probs."""
@@ -3164,12 +3172,14 @@ class LiteLLMInferenceEngine(
             # Introduce a slight delay to prevent burstiness
             await asyncio.sleep(0.01)
             messages = self.to_messages(instance)
             kwargs = self.to_dict([StandardAPIParamsMixin])
             kwargs = {k: v for k, v in kwargs.items() if v is not None}
             del kwargs["credentials"]
             try:
                 response = await self._completion(
                     messages=messages,
                     max_retries=self.max_retries,
                     drop_params=False,
                     **self.credentials,
@@ -3181,8 +3191,17 @@ class LiteLLMInferenceEngine(
                 ) from e
             usage = response.get("usage", {})
             return TextGenerationInferenceOutput(
-                prediction=response["choices"][0]["message"]["content"],
                 input_tokens=usage.get("prompt_tokens"),
                 output_tokens=usage.get("completion_tokens"),
                 model_name=response.get("model", self.model),
@@ -3267,6 +3286,7 @@ class CrossProviderInferenceEngine(InferenceEngine, StandardAPIParamsMixin):
         "watsonx-sdk": {  # checked from ibm_watsonx_ai.APIClient().foundation_models.ChatModels
             "granite-20b-code-instruct": "ibm/granite-20b-code-instruct",
             "granite-3-2-8b-instruct": "ibm/granite-3-2-8b-instruct",
             "granite-3-2b-instruct": "ibm/granite-3-2b-instruct",
             "granite-3-8b-instruct": "ibm/granite-3-8b-instruct",
             "granite-34b-code-instruct": "ibm/granite-34b-code-instruct",

             }
         ]
+    def to_tools(self, instance):
+        task_data = instance.get("task_data")
+        if isinstance(task_data, str):
+            task_data = json.loads(task_data)
+        if "__tools__" in task_data:
+            return task_data["__tools__"]
+        return None
 class LogProbInferenceEngine(abc.ABC, Artifact):
     """Abstract base class for inference with log probs."""
             # Introduce a slight delay to prevent burstiness
             await asyncio.sleep(0.01)
             messages = self.to_messages(instance)
+            tools = self.to_tools(instance)
             kwargs = self.to_dict([StandardAPIParamsMixin])
             kwargs = {k: v for k, v in kwargs.items() if v is not None}
             del kwargs["credentials"]
             try:
                 response = await self._completion(
                     messages=messages,
+                    tools=tools,
                     max_retries=self.max_retries,
                     drop_params=False,
                     **self.credentials,
                 ) from e
             usage = response.get("usage", {})
+            if tools is None:
+                prediction = response["choices"][0]["message"]["content"]
+            else:
+                try:
+                    func_call = response["choices"][0]["message"]["tool_calls"][0]["function"]
+                    prediction = f'{{"name": "{func_call.name}", "arguments": {func_call.arguments}}}'
+                except:
+                    prediction = response["choices"][0]["message"]["content"] or ""
             return TextGenerationInferenceOutput(
+                prediction=prediction,
                 input_tokens=usage.get("prompt_tokens"),
                 output_tokens=usage.get("completion_tokens"),
                 model_name=response.get("model", self.model),
         "watsonx-sdk": {  # checked from ibm_watsonx_ai.APIClient().foundation_models.ChatModels
             "granite-20b-code-instruct": "ibm/granite-20b-code-instruct",
             "granite-3-2-8b-instruct": "ibm/granite-3-2-8b-instruct",
+            "granite-3-3-8b-instruct": "ibm/granite-3-3-8b-instruct",
             "granite-3-2b-instruct": "ibm/granite-3-2b-instruct",
             "granite-3-8b-instruct": "ibm/granite-3-8b-instruct",
             "granite-34b-code-instruct": "ibm/granite-34b-code-instruct",

metric.py CHANGED Viewed

@@ -65,6 +65,7 @@ from .system_prompts import __file__ as _
 from .task import __file__ as _
 from .templates import __file__ as _
 from .text_utils import __file__ as _
 from .type_utils import __file__ as _
 from .types import __file__ as _
 from .utils import __file__ as _

 from .task import __file__ as _
 from .templates import __file__ as _
 from .text_utils import __file__ as _
+from .tool_calling import __file__ as _
 from .type_utils import __file__ as _
 from .types import __file__ as _
 from .utils import __file__ as _

metrics.py CHANGED Viewed

@@ -63,7 +63,9 @@ from .operators import ArtifactFetcherMixin, Copy, Set
 from .random_utils import get_seed
 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
 from .utils import deep_copy, recursive_copy, retry_connection_with_exponential_backoff
 logger = get_logger()
@@ -786,6 +788,77 @@ class F1Fast(MapReduceMetric[str, Tuple[int, int]]):
         return result
 class MetricWithConfidenceInterval(Metric):
     # The number of resamples used to estimate the confidence intervals of this metric.

 from .random_utils import get_seed
 from .settings_utils import get_settings
 from .stream import MultiStream, Stream
+from .tool_calling import convert_chat_api_format_to_tool
 from .type_utils import Type, isoftype, parse_type_string, to_type_string
+from .types import ToolCall
 from .utils import deep_copy, recursive_copy, retry_connection_with_exponential_backoff
 logger = get_logger()
         return result
+class ToolCallingMetric(ReductionInstanceMetric[str, Dict[str, float]]):
+    main_score = "exact_match"
+    reduction = MeanReduction()
+    prediction_type = ToolCall
+    def map(
+        self, prediction: ToolCall, references: List[ToolCall], task_data: Dict[str, Any]
+    ) -> Dict[str, float]:
+        exact_match = float(
+            str(prediction) in [str(reference) for reference in references]
+        )
+        tool_choice = float(
+            str(prediction["name"]) in [str(reference["name"]) for reference in references]
+        )
+        parameter_choice = 0.0
+        for reference in references:
+            if len(prediction["arguments"]) > 0:
+                score = len(set(prediction["arguments"]).intersection(set(reference["arguments"]))) / len(set(prediction["arguments"]))
+            else:
+                score = 1.0
+            if score > parameter_choice:
+                parameter_choice = score
+        parameter_values = 0.0
+        for reference in references:
+            value_matches = 0
+            for key, val in prediction["arguments"].items():
+                try:
+                    if val in reference["arguments"][key] or reference["arguments"][key] in val:
+                        value_matches += 1
+                except:
+                    pass
+            if len(prediction["arguments"]) > 0:
+                score = value_matches / len(prediction["arguments"])
+            else:
+                score = 1.0
+            if score > parameter_values:
+                parameter_values = score
+        for tool in task_data["__tools__"]:
+            tool = convert_chat_api_format_to_tool(tool)
+            tool_params_types = {}
+            for param in tool["parameters"]:
+                tool_params_types[param["name"]] = param["type"]
+            correct_parameters_types = 0
+            for key, value in prediction["arguments"].items():
+                typing_type = tool_params_types.get(key, Any)
+                if isoftype(value, typing_type):
+                    correct_parameters_types += 1
+            if len(prediction["arguments"]) > 0:
+                parameters_types = correct_parameters_types / len(prediction["arguments"])
+            else:
+                parameters_types = 1.0
+        return {
+            self.main_score: exact_match,
+            "tool_choice": tool_choice,
+            "parameter_choice": parameter_choice,
+            "parameters_types": parameters_types,
+            "parameter_values": parameter_values
+        }
 class MetricWithConfidenceInterval(Metric):
     # The number of resamples used to estimate the confidence intervals of this metric.

operators.py CHANGED Viewed

@@ -930,7 +930,7 @@ class Cast(FieldOperator):
     failure_default: Optional[Any] = "__UNDEFINED__"
     def prepare(self):
-        self.types = {"int": int, "float": float, "str": str, "bool": bool}
     def process_value(self, value):
         try:

     failure_default: Optional[Any] = "__UNDEFINED__"
     def prepare(self):
+        self.types = {"int": int, "float": float, "str": str, "bool": bool, "tuple": tuple}
     def process_value(self, value):
         try:

schema.py CHANGED Viewed

@@ -141,6 +141,9 @@ class FinalizeDataset(InstanceOperatorValidator):
         }
         if use_reference_fields:
             task_data = {**task_data, **instance["reference_fields"]}
         return task_data
     def serialize_instance_fields(self, instance, task_data):

         }
         if use_reference_fields:
             task_data = {**task_data, **instance["reference_fields"]}
+        if "__tools__" in instance:
+            task_data["__tools__"] = instance["__tools__"]
         return task_data
     def serialize_instance_fields(self, instance, task_data):

serializers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Union
 from .dataclass import AbstractField, Field
 from .operators import InstanceFieldOperator
 from .settings_utils import get_constants
 from .type_utils import isoftype, to_type_string
 from .types import (
     Dialog,
@@ -16,6 +17,8 @@ from .types import (
     Number,
     SQLDatabase,
     Table,
     Video,
 )
@@ -161,15 +164,43 @@ class MultiDocumentSerializer(DocumentSerializer):
         return "\n\n".join(documents)
 class MultiTypeSerializer(Serializer):
     serializers: List[SingleTypeSerializer] = Field(
         default_factory=lambda: [
             DocumentSerializer(),
             DialogSerializer(),
             MultiDocumentSerializer(),
             ImageSerializer(),
             VideoSerializer(),
             TableSerializer(),
             DialogSerializer(),
         ]
     )

 from .dataclass import AbstractField, Field
 from .operators import InstanceFieldOperator
 from .settings_utils import get_constants
+from .tool_calling import convert_to_chat_api_format
 from .type_utils import isoftype, to_type_string
 from .types import (
     Dialog,
     Number,
     SQLDatabase,
     Table,
+    Tool,
+    ToolCall,
     Video,
 )
         return "\n\n".join(documents)
+class ToolsSerializer(SingleTypeSerializer):
+    serialized_type = List[Tool]
+    _requirements_list: List[str] = ["pydantic"]
+    def serialize(self, value: List[Tool], instance: Dict[str, Any]) -> str:
+        if "__tools__" not in instance:
+            instance["__tools__"] = []
+        tool = []
+        for tool in value:
+            chat_api_tool = convert_to_chat_api_format(tool=tool)
+            instance["__tools__"].append(
+                chat_api_tool
+            )
+            tool["parameters"] = chat_api_tool["function"]["parameters"]
+        return json.dumps(instance["__tools__"], indent=4)
+class ToolCallSerializer(SingleTypeSerializer):
+    serialized_type = ToolCall
+    _requirements_list: List[str] = ["pydantic"]
+    def serialize(self, value: ToolCall, instance: Dict[str, Any]) -> str:
+        return json.dumps(value)
 class MultiTypeSerializer(Serializer):
     serializers: List[SingleTypeSerializer] = Field(
         default_factory=lambda: [
             DocumentSerializer(),
+            ToolCallSerializer(),
             DialogSerializer(),
             MultiDocumentSerializer(),
             ImageSerializer(),
             VideoSerializer(),
             TableSerializer(),
+            ToolsSerializer(),
             DialogSerializer(),
         ]
     )

templates.py CHANGED Viewed

@@ -19,6 +19,8 @@ from .serializers import (
     Serializer,
     SQLDatabaseAsSchemaSerializer,
     TableSerializer,
     VideoSerializer,
 )
 from .settings_utils import get_constants
@@ -63,6 +65,8 @@ class Template(InstanceOperator):
                 ImageSerializer(),
                 VideoSerializer(),
                 TableSerializer(),
                 DialogSerializer(),
                 ListSerializer(),
                 SQLDatabaseAsSchemaSerializer(),

     Serializer,
     SQLDatabaseAsSchemaSerializer,
     TableSerializer,
+    ToolCallSerializer,
+    ToolsSerializer,
     VideoSerializer,
 )
 from .settings_utils import get_constants
                 ImageSerializer(),
                 VideoSerializer(),
                 TableSerializer(),
+                ToolCallSerializer(),
+                ToolsSerializer(),
                 DialogSerializer(),
                 ListSerializer(),
                 SQLDatabaseAsSchemaSerializer(),

tool_calling.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from typing import Any, Dict, List, Type
+from .operators import FieldOperator
+from .types import Parameter, Tool
+def convert_to_chat_api_format(tool: Tool) -> Dict[str, Any]:
+    from pydantic import create_model
+    field_definitions = {}
+    for param in tool["parameters"]:
+        param_name = param["name"]
+        param_type = param.get("type", Any)
+        field_definitions[param_name] = (param_type, ...)  # ... means required in Pydantic
+    model = create_model(f"{tool['name']}Params", **field_definitions)
+    schema = model.model_json_schema()
+    return {
+        "type": "function",
+        "function": {
+            "name": tool["name"],
+            "description": tool["description"],
+            "parameters": schema
+        }
+    }
+def convert_chat_api_format_to_tool(chat_api_tool: Dict[str, Any]) -> Tool:
+    """Convert a Chat API formatted tool back to the original Tool structure.
+    Args:
+        chat_api_tool: A dictionary representing a tool in Chat API format
+    Returns:
+        A Tool dictionary with name, description, and parameters
+    """
+    # Extract function information
+    function_info = chat_api_tool.get("function", {})
+    name = function_info.get("name", chat_api_tool.get("name", ""))
+    description = function_info.get("description", chat_api_tool.get("description", ""))
+    # Extract parameters from schema
+    parameters: List[Parameter] = []
+    schema = function_info.get("parameters",  chat_api_tool.get("parameters", ""))
+    properties = schema.get("properties", {})
+    for param_name, param_schema in properties.items():
+        # Map JSON schema type to Python type
+        param_type = json_schema_to_python_type(param_schema)
+        parameter: Parameter = {
+            "name": param_name,
+            "type": param_type
+        }
+        parameters.append(parameter)
+    # Construct and return the Tool
+    tool: Tool = {
+        "name": name,
+        "description": description,
+        "parameters": parameters
+    }
+    return tool
+def json_schema_to_python_type(schema: Dict[str, Any]) -> Type:
+    """Convert JSON schema type to Python type."""
+    from typing import Any, Dict, List, Union
+    schema_type = schema.get("type")
+    # Handle simple types
+    simple_types = {
+        "string": str,
+        "integer": int,
+        "number": float,
+        "boolean": bool,
+        "null": type(None)
+    }
+    if schema_type in simple_types:
+        return simple_types[schema_type]
+    # Handle arrays
+    if schema_type == "array":
+        items = schema.get("items", {})
+        if not items:
+            return List[Any]
+        item_type = json_schema_to_python_type(items)
+        return List[item_type]
+    # Handle objects
+    if schema_type == "object":
+        return Dict[str, Any]
+    # Handle unions with anyOf/oneOf
+    if "anyOf" in schema or "oneOf" in schema:
+        union_schemas = schema.get("anyOf", []) or schema.get("oneOf", [])
+        union_types = [json_schema_to_python_type(s) for s in union_schemas]
+        # Use Union for Python 3.9+ or create Union using typing module
+        return Union[tuple(union_types)] if union_types else Any
+    # Handle references (simplified)
+    if "$ref" in schema:
+        # In a real implementation, you'd resolve references
+        return Any
+    # Default to Any for unrecognized schema types
+    return Any
+class ToTool(FieldOperator):
+    def process_value(self, value: Dict[str, Any]) -> Tool:
+        return convert_chat_api_format_to_tool(value)

type_utils.py CHANGED Viewed

@@ -69,6 +69,8 @@ def is_typed_dict(object):
 def is_type(object):
     """Checks if the provided object is a type, including generics, Literal, TypedDict, and NewType."""
     return (
         isinstance(object, (type, *_generics_types))
         or is_new_type(object)
@@ -487,6 +489,9 @@ def isoftype(object, typing_type):
     if not is_type(typing_type):
         raise UnsupportedTypeError(typing_type)
     if is_new_type(typing_type):
         typing_type = typing_type.__supertype__

 def is_type(object):
     """Checks if the provided object is a type, including generics, Literal, TypedDict, and NewType."""
+    if object is typing.Type:
+        return True
     return (
         isinstance(object, (type, *_generics_types))
         or is_new_type(object)
     if not is_type(typing_type):
         raise UnsupportedTypeError(typing_type)
+    if typing_type is typing.Type:
+        return is_type(object)
     if is_new_type(typing_type):
         typing_type = typing_type.__supertype__

types.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Literal, NewType, Optional, TypedDict, Union
 from .type_utils import register_type
@@ -51,6 +51,18 @@ class SQLDatabase(TypedDict):
     dbms: Optional[str]
     data: Optional[Dict[str, Dict]]
 register_type(Text)
 register_type(Number)
@@ -64,3 +76,7 @@ register_type(Document)
 register_type(MultiDocument)
 register_type(RagResponse)
 register_type(SQLDatabase)

+from typing import Any, Dict, List, Literal, NewType, Optional, Type, TypedDict, Union
 from .type_utils import register_type
     dbms: Optional[str]
     data: Optional[Dict[str, Dict]]
+class Parameter(TypedDict):
+    name: str
+    type: Optional[Type]  # Using actual Python type objects
+class Tool(TypedDict):
+    name: str
+    description: str
+    parameters: List[Parameter]
+class ToolCall(TypedDict):
+    name: str
+    arguments: Dict[str, Any]
 register_type(Text)
 register_type(Number)
 register_type(MultiDocument)
 register_type(RagResponse)
 register_type(SQLDatabase)
+register_type(Parameter)
+register_type(Tool)
+register_type(ToolCall)

version.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "1.22.3"


1	+ version = "1.22.4"