Upload folder using huggingface_hub
Browse files- formats.py +5 -5
- inference.py +19 -2
- llm_as_judge_constants.py +1 -1
- llm_as_judge_from_template.py +1 -0
- loaders.py +3 -1
- metrics.py +37 -38
- operator.py +4 -0
- operators.py +1 -1
- schema.py +3 -3
- settings_utils.py +3 -0
- standard.py +2 -2
- system_prompts.py +4 -2
- task.py +8 -2
- templates.py +4 -4
- text_utils.py +72 -2
- version.py +1 -1
formats.py
CHANGED
@@ -116,7 +116,7 @@ def apply_capital_new_line_notation(text: str) -> str:
|
|
116 |
|
117 |
|
118 |
class BaseFormat(Format):
|
119 |
-
demos_field: str =
|
120 |
|
121 |
@staticmethod
|
122 |
def _pop_field(instance, field_name, do_pop: bool = True) -> str:
|
@@ -133,14 +133,14 @@ class BaseFormat(Format):
|
|
133 |
def _prepare_instance_fields(self, instance) -> Tuple[str]:
|
134 |
instance_fields = {}
|
135 |
|
136 |
-
for field in "source",
|
137 |
instance_fields[field] = self._pop_field(instance, field)
|
138 |
|
139 |
instance_fields["media"] = self._pop_field(instance, "media", do_pop=False)
|
140 |
if not instance_fields["media"]:
|
141 |
instance_fields["media"] = {"images": [], "audios": []}
|
142 |
|
143 |
-
instance_fields[
|
144 |
if self.demos_field is not None and self.demos_field in instance:
|
145 |
demos = instance[self.demos_field]
|
146 |
assert (
|
@@ -150,7 +150,7 @@ class BaseFormat(Format):
|
|
150 |
demo = {}
|
151 |
for field in ["source", "target", "target_prefix"]:
|
152 |
demo[field] = self._pop_field(demo_instance, field, do_pop=False)
|
153 |
-
instance_fields[
|
154 |
|
155 |
return instance_fields
|
156 |
|
@@ -219,7 +219,7 @@ class SystemFormat(BaseFormat):
|
|
219 |
.. code-block::
|
220 |
|
221 |
system_format = SystemFormat(
|
222 |
-
demos_field=
|
223 |
demo_format="Input: {source}\nOutput: {target}\n\n",
|
224 |
model_input_format="Instruction: {instruction}\n\n{demos}Input: {source}\nOutput: ",
|
225 |
)
|
|
|
116 |
|
117 |
|
118 |
class BaseFormat(Format):
|
119 |
+
demos_field: str = constants.demos_field
|
120 |
|
121 |
@staticmethod
|
122 |
def _pop_field(instance, field_name, do_pop: bool = True) -> str:
|
|
|
133 |
def _prepare_instance_fields(self, instance) -> Tuple[str]:
|
134 |
instance_fields = {}
|
135 |
|
136 |
+
for field in "source", constants.instruction_field, constants.system_prompt_field, "target_prefix":
|
137 |
instance_fields[field] = self._pop_field(instance, field)
|
138 |
|
139 |
instance_fields["media"] = self._pop_field(instance, "media", do_pop=False)
|
140 |
if not instance_fields["media"]:
|
141 |
instance_fields["media"] = {"images": [], "audios": []}
|
142 |
|
143 |
+
instance_fields[constants.demos_field] = []
|
144 |
if self.demos_field is not None and self.demos_field in instance:
|
145 |
demos = instance[self.demos_field]
|
146 |
assert (
|
|
|
150 |
demo = {}
|
151 |
for field in ["source", "target", "target_prefix"]:
|
152 |
demo[field] = self._pop_field(demo_instance, field, do_pop=False)
|
153 |
+
instance_fields[constants.demos_field].append(demo)
|
154 |
|
155 |
return instance_fields
|
156 |
|
|
|
219 |
.. code-block::
|
220 |
|
221 |
system_format = SystemFormat(
|
222 |
+
demos_field=constants.demos_field,
|
223 |
demo_format="Input: {source}\nOutput: {target}\n\n",
|
224 |
model_input_format="Instruction: {instruction}\n\n{demos}Input: {source}\nOutput: ",
|
225 |
)
|
inference.py
CHANGED
@@ -1937,6 +1937,9 @@ class WMLChatParamsMixin(Artifact):
|
|
1937 |
time_limit: Optional[int] = None
|
1938 |
top_p: Optional[float] = None
|
1939 |
n: Optional[int] = None
|
|
|
|
|
|
|
1940 |
|
1941 |
|
1942 |
CredentialsWML = Dict[
|
@@ -2486,8 +2489,20 @@ class WMLInferenceEngineChat(WMLInferenceEngineBase, WMLChatParamsMixin):
|
|
2486 |
"of messages."
|
2487 |
)
|
2488 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2489 |
def to_messages(self, instance: Union[Dict, List]) -> List[List[Dict[str, Any]]]:
|
2490 |
-
if isinstance(instance["source"], str) and
|
2491 |
return self._create_messages_from_instance(instance)
|
2492 |
|
2493 |
messages = super().to_messages(instance)
|
@@ -2985,7 +3000,9 @@ class CrossProviderInferenceEngine(InferenceEngine, StandardAPIParamsMixin):
|
|
2985 |
mapping each supported API to a corresponding
|
2986 |
model identifier string. This mapping allows consistent access to models
|
2987 |
across different API backends.
|
2988 |
-
provider_specific_args:
|
|
|
|
|
2989 |
"""
|
2990 |
|
2991 |
label: str = "cross_provider"
|
|
|
1937 |
time_limit: Optional[int] = None
|
1938 |
top_p: Optional[float] = None
|
1939 |
n: Optional[int] = None
|
1940 |
+
seed: Optional[int] = None
|
1941 |
+
logit_bias: Optional[Dict[str, Any]] = None
|
1942 |
+
stop: Optional[List[str]] = None
|
1943 |
|
1944 |
|
1945 |
CredentialsWML = Dict[
|
|
|
2489 |
"of messages."
|
2490 |
)
|
2491 |
|
2492 |
+
@staticmethod
|
2493 |
+
def check_instance_contains_image(instance: Dict[str, Any]) -> bool:
|
2494 |
+
if "media" not in instance:
|
2495 |
+
return False
|
2496 |
+
if not isinstance(instance["media"], dict):
|
2497 |
+
return False
|
2498 |
+
if "images" not in instance["media"]:
|
2499 |
+
return False
|
2500 |
+
if not instance["media"]["images"]:
|
2501 |
+
return False
|
2502 |
+
return True
|
2503 |
+
|
2504 |
def to_messages(self, instance: Union[Dict, List]) -> List[List[Dict[str, Any]]]:
|
2505 |
+
if isinstance(instance["source"], str) and self.check_instance_contains_image(instance):
|
2506 |
return self._create_messages_from_instance(instance)
|
2507 |
|
2508 |
messages = super().to_messages(instance)
|
|
|
3000 |
mapping each supported API to a corresponding
|
3001 |
model identifier string. This mapping allows consistent access to models
|
3002 |
across different API backends.
|
3003 |
+
provider_specific_args:
|
3004 |
+
(Optional[Dict[str, Dict[str,str]]]) Args specific to a provider for example provider_specific_args={"watsonx": {"max_requests_per_second": 4}}
|
3005 |
+
|
3006 |
"""
|
3007 |
|
3008 |
label: str = "cross_provider"
|
llm_as_judge_constants.py
CHANGED
@@ -205,7 +205,7 @@ class DirectCriteriaCatalogEnum(Enum):
|
|
205 |
),
|
206 |
CriteriaOption(
|
207 |
"Pass",
|
208 |
-
"There is no
|
209 |
),
|
210 |
],
|
211 |
{"Yes": 1.0, "No": 0.5, "Pass": 0.0},
|
|
|
205 |
),
|
206 |
CriteriaOption(
|
207 |
"Pass",
|
208 |
+
"There is no numerical temperature reading in the response.",
|
209 |
),
|
210 |
],
|
211 |
{"Yes": 1.0, "No": 0.5, "Pass": 0.0},
|
llm_as_judge_from_template.py
CHANGED
@@ -37,6 +37,7 @@ class LLMAsJudgeBase(BulkInstanceMetric, ArtifactFetcherMixin):
|
|
37 |
inference_model (InferenceEngine): The module that creates the inference of the judge llm.
|
38 |
reduction_map (dict): A dictionary specifying the reduction method for the metric.
|
39 |
batch_size (int): The size of the bulk.
|
|
|
40 |
"""
|
41 |
|
42 |
main_score: str = "llm_as_judge"
|
|
|
37 |
inference_model (InferenceEngine): The module that creates the inference of the judge llm.
|
38 |
reduction_map (dict): A dictionary specifying the reduction method for the metric.
|
39 |
batch_size (int): The size of the bulk.
|
40 |
+
|
41 |
"""
|
42 |
|
43 |
main_score: str = "llm_as_judge"
|
loaders.py
CHANGED
@@ -443,11 +443,13 @@ class LoadCSV(LazyLoader):
|
|
443 |
dataset = reader(self.files[split], **self.get_args()).to_dict(
|
444 |
"records"
|
445 |
)
|
|
|
446 |
except ValueError:
|
447 |
import fsspec
|
448 |
|
449 |
with fsspec.open(self.files[split], mode="rt") as f:
|
450 |
dataset = reader(f, **self.get_args()).to_dict("records")
|
|
|
451 |
except Exception as e:
|
452 |
logger.debug(f"Attempt csv load {attempt + 1} failed: {e}")
|
453 |
if attempt < settings.loaders_max_retries - 1:
|
@@ -601,7 +603,7 @@ class LoadFromIBMCloud(Loader):
|
|
601 |
bucket_name='my-bucket'
|
602 |
)
|
603 |
multi_stream = load_ibm_cloud.process()
|
604 |
-
"""
|
605 |
|
606 |
endpoint_url_env: str
|
607 |
aws_access_key_id_env: str
|
|
|
443 |
dataset = reader(self.files[split], **self.get_args()).to_dict(
|
444 |
"records"
|
445 |
)
|
446 |
+
break
|
447 |
except ValueError:
|
448 |
import fsspec
|
449 |
|
450 |
with fsspec.open(self.files[split], mode="rt") as f:
|
451 |
dataset = reader(f, **self.get_args()).to_dict("records")
|
452 |
+
break
|
453 |
except Exception as e:
|
454 |
logger.debug(f"Attempt csv load {attempt + 1} failed: {e}")
|
455 |
if attempt < settings.loaders_max_retries - 1:
|
|
|
603 |
bucket_name='my-bucket'
|
604 |
)
|
605 |
multi_stream = load_ibm_cloud.process()
|
606 |
+
""" # pragma: allowlist secret
|
607 |
|
608 |
endpoint_url_env: str
|
609 |
aws_access_key_id_env: str
|
metrics.py
CHANGED
@@ -75,6 +75,7 @@ settings = get_settings()
|
|
75 |
|
76 |
warnings.filterwarnings("ignore", category=DegenerateDataWarning)
|
77 |
|
|
|
78 |
def hf_evaluate_load(path: str, *args, **kwargs):
|
79 |
if settings.hf_offline_metrics_path is not None:
|
80 |
path = os.path.join(settings.hf_offline_metrics_path, path)
|
@@ -83,13 +84,18 @@ def hf_evaluate_load(path: str, *args, **kwargs):
|
|
83 |
*args,
|
84 |
**kwargs,
|
85 |
experiment_id=str(uuid.uuid4()),
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
class MetricsList(ListCollection):
|
95 |
def verify(self):
|
@@ -2311,13 +2317,11 @@ class HuggingfaceMetric(GlobalMetric):
|
|
2311 |
Documentation.HUGGINGFACE_METRICS,
|
2312 |
)
|
2313 |
|
2314 |
-
assert (
|
2315 |
-
self.hf_additional_input_fields
|
2316 |
-
or isoftype(self.hf_additional_input_fields, List[str])
|
2317 |
), f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}."
|
2318 |
-
assert (
|
2319 |
-
self.hf_additional_input_fields_pass_one_value
|
2320 |
-
or isoftype(self.hf_additional_input_fields_pass_one_value, List[str])
|
2321 |
), f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}."
|
2322 |
|
2323 |
return super().verify()
|
@@ -2826,9 +2830,7 @@ class F1MultiLabel(GlobalMetric, PackageRequirementsMixin):
|
|
2826 |
def prepare(self):
|
2827 |
super().prepare()
|
2828 |
|
2829 |
-
self._metric = hf_evaluate_load(
|
2830 |
-
self.metric, "multilabel"
|
2831 |
-
)
|
2832 |
|
2833 |
def add_str_to_id(self, str):
|
2834 |
if str not in self.str_to_id:
|
@@ -2885,8 +2887,8 @@ class F1MultiLabel(GlobalMetric, PackageRequirementsMixin):
|
|
2885 |
labels=labels_param,
|
2886 |
)
|
2887 |
if isinstance(result[self.metric], numpy.ndarray):
|
2888 |
-
assert (
|
2889 |
-
|
2890 |
), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
|
2891 |
final_result = {self.main_score: nan_mean(result[self.metric])}
|
2892 |
for i, label in enumerate(labels):
|
@@ -3625,7 +3627,9 @@ class Detector(BulkInstanceMetric):
|
|
3625 |
if settings.hf_offline_models_path is not None:
|
3626 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
3627 |
self.pipe = pipeline(
|
3628 |
-
"text-classification",
|
|
|
|
|
3629 |
)
|
3630 |
|
3631 |
def compute(
|
@@ -3662,7 +3666,6 @@ class RegardMetric(GlobalMetric):
|
|
3662 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
3663 |
self.regard_model = AutoModelForSequenceClassification.from_pretrained(
|
3664 |
model_path,
|
3665 |
-
|
3666 |
)
|
3667 |
self.regard_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
3668 |
|
@@ -3865,9 +3868,9 @@ class LlamaIndexLLMMetric(InstanceMetric):
|
|
3865 |
prediction_type = str
|
3866 |
reduction_map: Dict[str, List[str]] = None
|
3867 |
openai_models: List[str] = ["gpt-3.5-turbo"]
|
3868 |
-
anthropic_models: List[
|
3869 |
-
|
3870 |
-
|
3871 |
mock_models: List[str] = ["mock"]
|
3872 |
external_api_models = openai_models + anthropic_models
|
3873 |
data_classification_policy = ["public"]
|
@@ -4123,9 +4126,7 @@ class Perplexity(BulkInstanceMetric):
|
|
4123 |
model_path = self.model_name
|
4124 |
if settings.hf_offline_models_path is not None:
|
4125 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
4126 |
-
self.model = (
|
4127 |
-
self.model_class().from_pretrained(model_path).to(self.device)
|
4128 |
-
)
|
4129 |
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
4130 |
if self.tokenizer.pad_token_id is None:
|
4131 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
@@ -4291,7 +4292,7 @@ class FaithfulnessHHEM(BulkInstanceMetric):
|
|
4291 |
batch_size: int = 2
|
4292 |
model_name: str = "vectara/hallucination_evaluation_model"
|
4293 |
prediction_type = str
|
4294 |
-
|
4295 |
max_context_words = 4096
|
4296 |
reduction_map = {"mean": [main_score]}
|
4297 |
|
@@ -4308,6 +4309,7 @@ class FaithfulnessHHEM(BulkInstanceMetric):
|
|
4308 |
else:
|
4309 |
device = "cpu"
|
4310 |
from transformers import AutoModelForSequenceClassification
|
|
|
4311 |
model_path = self.model_name
|
4312 |
if settings.hf_offline_models_path is not None:
|
4313 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
@@ -5955,6 +5957,7 @@ class GraniteGuardianBase(InstanceMetric):
|
|
5955 |
|
5956 |
def prepare(self):
|
5957 |
from transformers import AutoTokenizer
|
|
|
5958 |
if not isinstance(self.risk_type, RiskType):
|
5959 |
self.risk_type = RiskType[self.risk_type]
|
5960 |
if not hasattr(self, "_tokenizer") or self._tokenizer is None:
|
@@ -6268,18 +6271,10 @@ class SQLExecutionAccuracy(InstanceMetric):
|
|
6268 |
if df1.shape != df2.shape:
|
6269 |
return False
|
6270 |
|
6271 |
-
|
6272 |
-
|
6273 |
-
# if not return False, if all the columns worked return tue
|
6274 |
-
for df1_col in df1.columns:
|
6275 |
-
col_matched = False
|
6276 |
-
for df2_col in df2.columns:
|
6277 |
-
if all(df1[df1_col].values == df2[df2_col].values):
|
6278 |
-
col_matched = True
|
6279 |
-
if not col_matched:
|
6280 |
-
return False
|
6281 |
|
6282 |
-
return
|
6283 |
|
6284 |
@staticmethod
|
6285 |
def is_subset_ignore_colnames(df1, df2):
|
@@ -6381,6 +6376,8 @@ class SQLExecutionAccuracy(InstanceMetric):
|
|
6381 |
gold_error,
|
6382 |
)
|
6383 |
|
|
|
|
|
6384 |
gold_df = pd.DataFrame(gold_res)
|
6385 |
non_empty_gold_df = 0 if gold_df.empty else 1
|
6386 |
|
@@ -6444,6 +6441,8 @@ class SQLExecutionAccuracy(InstanceMetric):
|
|
6444 |
pred_error,
|
6445 |
)
|
6446 |
|
|
|
|
|
6447 |
predicted_df = pd.DataFrame(pred_res)
|
6448 |
|
6449 |
execution_result = (
|
|
|
75 |
|
76 |
warnings.filterwarnings("ignore", category=DegenerateDataWarning)
|
77 |
|
78 |
+
|
79 |
def hf_evaluate_load(path: str, *args, **kwargs):
|
80 |
if settings.hf_offline_metrics_path is not None:
|
81 |
path = os.path.join(settings.hf_offline_metrics_path, path)
|
|
|
84 |
*args,
|
85 |
**kwargs,
|
86 |
experiment_id=str(uuid.uuid4()),
|
87 |
+
download_config=DownloadConfig(
|
88 |
+
max_retries=settings.loaders_max_retries,
|
89 |
+
),
|
90 |
+
verification_mode="no_checks",
|
91 |
+
trust_remote_code=settings.allow_unverified_code,
|
92 |
+
download_mode=(
|
93 |
+
"force_redownload"
|
94 |
+
if settings.disable_hf_datasets_cache
|
95 |
+
else "reuse_dataset_if_exists"
|
96 |
+
),
|
97 |
+
)
|
98 |
+
|
99 |
|
100 |
class MetricsList(ListCollection):
|
101 |
def verify(self):
|
|
|
2317 |
Documentation.HUGGINGFACE_METRICS,
|
2318 |
)
|
2319 |
|
2320 |
+
assert self.hf_additional_input_fields is None or isoftype(
|
2321 |
+
self.hf_additional_input_fields, List[str]
|
|
|
2322 |
), f"Argument hf_additional_input_fields should be either None or List[str]. It is now: {self.hf_additional_input_fields}."
|
2323 |
+
assert self.hf_additional_input_fields_pass_one_value is None or isoftype(
|
2324 |
+
self.hf_additional_input_fields_pass_one_value, List[str]
|
|
|
2325 |
), f"Argument hf_additional_input_fields_pass_one_value should be either None or List[str]. It is now: {self.hf_additional_input_fields_pass_one_value}."
|
2326 |
|
2327 |
return super().verify()
|
|
|
2830 |
def prepare(self):
|
2831 |
super().prepare()
|
2832 |
|
2833 |
+
self._metric = hf_evaluate_load(self.metric, "multilabel")
|
|
|
|
|
2834 |
|
2835 |
def add_str_to_id(self, str):
|
2836 |
if str not in self.str_to_id:
|
|
|
2887 |
labels=labels_param,
|
2888 |
)
|
2889 |
if isinstance(result[self.metric], numpy.ndarray):
|
2890 |
+
assert len(result[self.metric]) == len(
|
2891 |
+
labels
|
2892 |
), f"F1 result ({result[self.metric]}) has more entries than labels ({labels})"
|
2893 |
final_result = {self.main_score: nan_mean(result[self.metric])}
|
2894 |
for i, label in enumerate(labels):
|
|
|
3627 |
if settings.hf_offline_models_path is not None:
|
3628 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
3629 |
self.pipe = pipeline(
|
3630 |
+
"text-classification",
|
3631 |
+
model=model_path,
|
3632 |
+
device=device,
|
3633 |
)
|
3634 |
|
3635 |
def compute(
|
|
|
3666 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
3667 |
self.regard_model = AutoModelForSequenceClassification.from_pretrained(
|
3668 |
model_path,
|
|
|
3669 |
)
|
3670 |
self.regard_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
3671 |
|
|
|
3868 |
prediction_type = str
|
3869 |
reduction_map: Dict[str, List[str]] = None
|
3870 |
openai_models: List[str] = ["gpt-3.5-turbo"]
|
3871 |
+
anthropic_models: List[str] = (
|
3872 |
+
[]
|
3873 |
+
) # this is here for the sake of documentation for future models
|
3874 |
mock_models: List[str] = ["mock"]
|
3875 |
external_api_models = openai_models + anthropic_models
|
3876 |
data_classification_policy = ["public"]
|
|
|
4126 |
model_path = self.model_name
|
4127 |
if settings.hf_offline_models_path is not None:
|
4128 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
4129 |
+
self.model = self.model_class().from_pretrained(model_path).to(self.device)
|
|
|
|
|
4130 |
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
|
4131 |
if self.tokenizer.pad_token_id is None:
|
4132 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
|
|
4292 |
batch_size: int = 2
|
4293 |
model_name: str = "vectara/hallucination_evaluation_model"
|
4294 |
prediction_type = str
|
4295 |
+
# single_reference_per_prediction = True
|
4296 |
max_context_words = 4096
|
4297 |
reduction_map = {"mean": [main_score]}
|
4298 |
|
|
|
4309 |
else:
|
4310 |
device = "cpu"
|
4311 |
from transformers import AutoModelForSequenceClassification
|
4312 |
+
|
4313 |
model_path = self.model_name
|
4314 |
if settings.hf_offline_models_path is not None:
|
4315 |
model_path = os.path.join(settings.hf_offline_models_path, model_path)
|
|
|
5957 |
|
5958 |
def prepare(self):
|
5959 |
from transformers import AutoTokenizer
|
5960 |
+
|
5961 |
if not isinstance(self.risk_type, RiskType):
|
5962 |
self.risk_type = RiskType[self.risk_type]
|
5963 |
if not hasattr(self, "_tokenizer") or self._tokenizer is None:
|
|
|
6271 |
if df1.shape != df2.shape:
|
6272 |
return False
|
6273 |
|
6274 |
+
df1_rows_sorted = [sorted(map(str, row)) for row in df1.to_numpy()]
|
6275 |
+
df2_rows_sorted = [sorted(map(str, row)) for row in df2.to_numpy()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6276 |
|
6277 |
+
return df1_rows_sorted == df2_rows_sorted
|
6278 |
|
6279 |
@staticmethod
|
6280 |
def is_subset_ignore_colnames(df1, df2):
|
|
|
6376 |
gold_error,
|
6377 |
)
|
6378 |
|
6379 |
+
if isinstance(gold_res, dict) and "results" in gold_res:
|
6380 |
+
gold_res = gold_res["results"]
|
6381 |
gold_df = pd.DataFrame(gold_res)
|
6382 |
non_empty_gold_df = 0 if gold_df.empty else 1
|
6383 |
|
|
|
6441 |
pred_error,
|
6442 |
)
|
6443 |
|
6444 |
+
if isinstance(pred_res, dict) and "results" in pred_res:
|
6445 |
+
pred_res = pred_res["results"]
|
6446 |
predicted_df = pd.DataFrame(pred_res)
|
6447 |
|
6448 |
execution_result = (
|
operator.py
CHANGED
@@ -157,6 +157,7 @@ class StreamingOperator(Operator, PackageRequirementsMixin):
|
|
157 |
"""
|
158 |
|
159 |
|
|
|
160 |
class SideEffectOperator(StreamingOperator):
|
161 |
"""Base class for operators that does not affect the stream."""
|
162 |
|
@@ -249,6 +250,9 @@ class SourceOperator(MultiStreamOperator):
|
|
249 |
pass
|
250 |
|
251 |
|
|
|
|
|
|
|
252 |
class StreamInitializerOperator(SourceOperator):
|
253 |
"""A class representing a stream initializer operator in the streaming system.
|
254 |
|
|
|
157 |
"""
|
158 |
|
159 |
|
160 |
+
|
161 |
class SideEffectOperator(StreamingOperator):
|
162 |
"""Base class for operators that does not affect the stream."""
|
163 |
|
|
|
250 |
pass
|
251 |
|
252 |
|
253 |
+
def get_splits(self):
|
254 |
+
return list(self.process().keys())
|
255 |
+
|
256 |
class StreamInitializerOperator(SourceOperator):
|
257 |
"""A class representing a stream initializer operator in the streaming system.
|
258 |
|
operators.py
CHANGED
@@ -1527,7 +1527,7 @@ class IntersectCorrespondingFields(InstanceOperator):
|
|
1527 |
|
1528 |
if not isinstance(self.allowed_values, list):
|
1529 |
raise ValueError(
|
1530 |
-
f"The
|
1531 |
)
|
1532 |
|
1533 |
def process(
|
|
|
1527 |
|
1528 |
if not isinstance(self.allowed_values, list):
|
1529 |
raise ValueError(
|
1530 |
+
f"The allowed_values is not a type list but '{type(self.allowed_values)}'"
|
1531 |
)
|
1532 |
|
1533 |
def process(
|
schema.py
CHANGED
@@ -151,10 +151,10 @@ class FinalizeDataset(InstanceOperatorValidator):
|
|
151 |
)
|
152 |
if "criteria" in task_data and isinstance(task_data["criteria"], Artifact):
|
153 |
task_data["criteria"] = self.artifact_to_jsonable(task_data["criteria"])
|
154 |
-
if
|
155 |
-
task_data[
|
156 |
self._get_instance_task_data(instance)
|
157 |
-
for instance in instance.pop(
|
158 |
]
|
159 |
|
160 |
instance = self.serialize_instance_fields(instance, task_data)
|
|
|
151 |
)
|
152 |
if "criteria" in task_data and isinstance(task_data["criteria"], Artifact):
|
153 |
task_data["criteria"] = self.artifact_to_jsonable(task_data["criteria"])
|
154 |
+
if constants.demos_field in instance:
|
155 |
+
task_data[constants.demos_field] = [
|
156 |
self._get_instance_task_data(instance)
|
157 |
+
for instance in instance.pop(constants.demos_field)
|
158 |
]
|
159 |
|
160 |
instance = self.serialize_instance_fields(instance, task_data)
|
settings_utils.py
CHANGED
@@ -192,6 +192,9 @@ if Constants.is_uninitilized():
|
|
192 |
constants.instance_stream = "__INSTANCE_STREAM__"
|
193 |
constants.image_tag = "unitxt-img"
|
194 |
constants.demos_pool_field = "_demos_pool_"
|
|
|
|
|
|
|
195 |
|
196 |
|
197 |
def get_settings() -> Settings:
|
|
|
192 |
constants.instance_stream = "__INSTANCE_STREAM__"
|
193 |
constants.image_tag = "unitxt-img"
|
194 |
constants.demos_pool_field = "_demos_pool_"
|
195 |
+
constants.demos_field = "demos"
|
196 |
+
constants.instruction_field = "instruction"
|
197 |
+
constants.system_prompt_field = "system_prompt"
|
198 |
|
199 |
|
200 |
def get_settings() -> Settings:
|
standard.py
CHANGED
@@ -276,7 +276,7 @@ class DatasetRecipe(SourceSequentialOperator):
|
|
276 |
demos_pool_field_name: str = constants.demos_pool_field
|
277 |
|
278 |
demos_taken_from: str = "train"
|
279 |
-
demos_field: str =
|
280 |
sampler: Sampler = None
|
281 |
|
282 |
# do not push demos to instances whose "demos" field is already populated
|
@@ -608,7 +608,7 @@ class DatasetRecipe(SourceSequentialOperator):
|
|
608 |
)
|
609 |
)
|
610 |
self.verbalization.steps.append(
|
611 |
-
GetLength(field=
|
612 |
)
|
613 |
self.verbalization.steps.append(
|
614 |
Set(
|
|
|
276 |
demos_pool_field_name: str = constants.demos_pool_field
|
277 |
|
278 |
demos_taken_from: str = "train"
|
279 |
+
demos_field: str = constants.demos_field
|
280 |
sampler: Sampler = None
|
281 |
|
282 |
# do not push demos to instances whose "demos" field is already populated
|
|
|
608 |
)
|
609 |
)
|
610 |
self.verbalization.steps.append(
|
611 |
+
GetLength(field=constants.demos_field, to_field="recipe_metadata/num_demos")
|
612 |
)
|
613 |
self.verbalization.steps.append(
|
614 |
Set(
|
system_prompts.py
CHANGED
@@ -3,7 +3,9 @@ from typing import Any, Dict, Optional
|
|
3 |
|
4 |
from .dataclass import NonPositionalField
|
5 |
from .operator import InstanceOperator
|
|
|
6 |
|
|
|
7 |
|
8 |
class SystemPrompt(InstanceOperator):
|
9 |
"""The role of SystemPrompt is to add task-independent opening-text to every instance."""
|
@@ -14,10 +16,10 @@ class SystemPrompt(InstanceOperator):
|
|
14 |
self, instance: Dict[str, Any], stream_name: Optional[str] = None
|
15 |
) -> Dict[str, Any]:
|
16 |
if self.skip_rendered_instance:
|
17 |
-
if
|
18 |
return instance
|
19 |
|
20 |
-
instance[
|
21 |
|
22 |
return instance
|
23 |
|
|
|
3 |
|
4 |
from .dataclass import NonPositionalField
|
5 |
from .operator import InstanceOperator
|
6 |
+
from .settings_utils import get_constants
|
7 |
|
8 |
+
constants = get_constants()
|
9 |
|
10 |
class SystemPrompt(InstanceOperator):
|
11 |
"""The role of SystemPrompt is to add task-independent opening-text to every instance."""
|
|
|
16 |
self, instance: Dict[str, Any], stream_name: Optional[str] = None
|
17 |
) -> Dict[str, Any]:
|
18 |
if self.skip_rendered_instance:
|
19 |
+
if constants.system_prompt_field in instance:
|
20 |
return instance
|
21 |
|
22 |
+
instance[constants.system_prompt_field] = self.get_system_prompt(instance)
|
23 |
|
24 |
return instance
|
25 |
|
task.py
CHANGED
@@ -302,9 +302,15 @@ class Task(InstanceOperator, ArtifactFetcherMixin):
|
|
302 |
"media": instance.get("media", {}),
|
303 |
"recipe_metadata": instance.get("recipe_metadata", {}),
|
304 |
}
|
305 |
-
if
|
306 |
# for the case of recipe.skip_demoed_instances
|
307 |
-
result[
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
|
309 |
if stream_name == constants.inference_stream:
|
310 |
return result
|
|
|
302 |
"media": instance.get("media", {}),
|
303 |
"recipe_metadata": instance.get("recipe_metadata", {}),
|
304 |
}
|
305 |
+
if constants.demos_field in instance:
|
306 |
# for the case of recipe.skip_demoed_instances
|
307 |
+
result[constants.demos_field] = instance[constants.demos_field]
|
308 |
+
|
309 |
+
if constants.instruction_field in instance:
|
310 |
+
result[constants.instruction_field] = instance[constants.instruction_field]
|
311 |
+
|
312 |
+
if constants.system_prompt_field in instance:
|
313 |
+
result[constants.system_prompt_field] = instance[constants.system_prompt_field]
|
314 |
|
315 |
if stream_name == constants.inference_stream:
|
316 |
return result
|
templates.py
CHANGED
@@ -76,9 +76,9 @@ class Template(InstanceOperator):
|
|
76 |
self.postprocessors, List[Union[Operator, str]]
|
77 |
), f"The template post processors field '{self.postprocessors}' is not a list of processors. Instead it is of type '{to_type_string(type(self.postprocessors))}'."
|
78 |
|
79 |
-
def input_fields_to_instruction_and_target_prefix(self, input_fields):
|
80 |
instruction = self.apply_formatting(
|
81 |
-
input_fields, "input field",
|
82 |
)
|
83 |
target_prefix = self.apply_formatting(
|
84 |
input_fields,
|
@@ -126,13 +126,13 @@ class Template(InstanceOperator):
|
|
126 |
|
127 |
source = self.input_fields_to_source(serialized_inputs)
|
128 |
instruction, target_prefix = self.input_fields_to_instruction_and_target_prefix(
|
129 |
-
serialized_inputs
|
130 |
)
|
131 |
|
132 |
result = {
|
133 |
**instance,
|
134 |
"source": source,
|
135 |
-
|
136 |
"target_prefix": target_prefix,
|
137 |
"postprocessors": self.postprocessors,
|
138 |
}
|
|
|
76 |
self.postprocessors, List[Union[Operator, str]]
|
77 |
), f"The template post processors field '{self.postprocessors}' is not a list of processors. Instead it is of type '{to_type_string(type(self.postprocessors))}'."
|
78 |
|
79 |
+
def input_fields_to_instruction_and_target_prefix(self, input_fields, instruction):
|
80 |
instruction = self.apply_formatting(
|
81 |
+
input_fields, "input field", instruction, "instruction"
|
82 |
)
|
83 |
target_prefix = self.apply_formatting(
|
84 |
input_fields,
|
|
|
126 |
|
127 |
source = self.input_fields_to_source(serialized_inputs)
|
128 |
instruction, target_prefix = self.input_fields_to_instruction_and_target_prefix(
|
129 |
+
serialized_inputs, instance.get(constants.instruction_field, self.instruction)
|
130 |
)
|
131 |
|
132 |
result = {
|
133 |
**instance,
|
134 |
"source": source,
|
135 |
+
constants.instruction_field: instruction,
|
136 |
"target_prefix": target_prefix,
|
137 |
"postprocessors": self.postprocessors,
|
138 |
}
|
text_utils.py
CHANGED
@@ -201,7 +201,7 @@ def construct_dict_as_yaml_lines(d, indent_delta=2) -> List[str]:
|
|
201 |
assert (
|
202 |
indent_delta >= 2
|
203 |
), f"Needs at least 2 position indentations, for the case of list elements, that are to be preceded each by ' -'. Got indent_delta={indent_delta}."
|
204 |
-
res = [] #
|
205 |
|
206 |
if isinstance(d, dict):
|
207 |
if len(d) == 0:
|
@@ -236,6 +236,72 @@ def construct_dict_as_yaml_lines(d, indent_delta=2) -> List[str]:
|
|
236 |
d1 = f'"{d1}"'
|
237 |
return [d1]
|
238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
def print_dict(
|
241 |
d, indent=0, indent_delta=4, max_chars=None, keys_to_print=None, log_level="info"
|
@@ -246,11 +312,15 @@ def print_dict(
|
|
246 |
|
247 |
|
248 |
def print_dict_as_yaml(d: dict, indent_delta=2) -> str:
|
249 |
-
yaml_lines = construct_dict_as_yaml_lines(d)
|
250 |
# yaml_lines = [re.sub(r"(\n+)", r'"\1"', line) for line in yaml_lines]
|
251 |
# yaml_lines = [line.replace("\n", "\\n") for line in yaml_lines]
|
252 |
return "\n".join(yaml_lines)
|
253 |
|
|
|
|
|
|
|
|
|
254 |
|
255 |
def nested_tuple_to_string(nested_tuple: tuple) -> str:
|
256 |
"""Converts a nested tuple to a string, with elements separated by underscores.
|
|
|
201 |
assert (
|
202 |
indent_delta >= 2
|
203 |
), f"Needs at least 2 position indentations, for the case of list elements, that are to be preceded each by ' -'. Got indent_delta={indent_delta}."
|
204 |
+
res = [] # computed hereunder as a list of lines, that are indented only at the end
|
205 |
|
206 |
if isinstance(d, dict):
|
207 |
if len(d) == 0:
|
|
|
236 |
d1 = f'"{d1}"'
|
237 |
return [d1]
|
238 |
|
239 |
+
def construct_dict_as_python_lines(d, indent_delta=4) -> List[str]:
|
240 |
+
"""Constructs the lines of a dictionary formatted as a piece of python code.
|
241 |
+
|
242 |
+
Args:
|
243 |
+
d: The element to be formatted.
|
244 |
+
indent_delta (int, optional): The amount of spaces to add for each level of indentation. Defaults to 2.
|
245 |
+
"""
|
246 |
+
indent_delta_str = " " * indent_delta
|
247 |
+
res = [] # computed hereunder as a list of lines, that are indented only at the end
|
248 |
+
|
249 |
+
if isinstance(d, dict):
|
250 |
+
istype = False
|
251 |
+
if len(d) == 0:
|
252 |
+
return ["{}"]
|
253 |
+
if "__type__" in d:
|
254 |
+
istype = True
|
255 |
+
res = ["__type__" + d["__type__"] + "("]
|
256 |
+
if len(d) == 1:
|
257 |
+
res[0] += ")"
|
258 |
+
return res
|
259 |
+
else:
|
260 |
+
res = ["{"]
|
261 |
+
for key, val in d.items():
|
262 |
+
if key == "__type__":
|
263 |
+
continue
|
264 |
+
printable_key = f'"{key}"' if not istype else key
|
265 |
+
res.append(printable_key + ("=" if istype else ": "))
|
266 |
+
py_for_val = construct_dict_as_python_lines(val, indent_delta=indent_delta)
|
267 |
+
assert len(py_for_val) > 0
|
268 |
+
if len(py_for_val) == 1:
|
269 |
+
res[-1] += (py_for_val[0] +",")
|
270 |
+
else:
|
271 |
+
res[-1] += py_for_val[0]
|
272 |
+
if py_for_val[0].startswith("{") or py_for_val[0].startswith("["):
|
273 |
+
for line in py_for_val[1:-1]:
|
274 |
+
res.append(indent_delta_str + line)
|
275 |
+
else:
|
276 |
+
# val is type, its inner lines are already indented
|
277 |
+
res.extend(py_for_val[1:-1])
|
278 |
+
res.append(py_for_val[-1]+",")
|
279 |
+
res.append(")" if istype else "}")
|
280 |
+
if istype:
|
281 |
+
for i in range(1,len(res)-1):
|
282 |
+
res[i] = indent_delta_str+res[i]
|
283 |
+
return res
|
284 |
+
|
285 |
+
if isinstance(d, list):
|
286 |
+
if len(d) == 0:
|
287 |
+
return ["[]"]
|
288 |
+
res = ["["]
|
289 |
+
for val in d:
|
290 |
+
py_for_val = construct_dict_as_python_lines(val, indent_delta=indent_delta)
|
291 |
+
assert len(py_for_val) > 0
|
292 |
+
for line in py_for_val[:-1]:
|
293 |
+
res.append(line)
|
294 |
+
res.append(py_for_val[-1] + ",")
|
295 |
+
res.append("]")
|
296 |
+
return res
|
297 |
+
|
298 |
+
# d1 = re.sub(r"(\n+)", r'"\1"', str(d))
|
299 |
+
if isinstance(d, str):
|
300 |
+
return [f'"{d}"']
|
301 |
+
if d is None or isinstance (d, (int, float, bool)):
|
302 |
+
return [f"{d}"]
|
303 |
+
raise RuntimeError(f"unrecognized value to print as python: {d}")
|
304 |
+
|
305 |
|
306 |
def print_dict(
|
307 |
d, indent=0, indent_delta=4, max_chars=None, keys_to_print=None, log_level="info"
|
|
|
312 |
|
313 |
|
314 |
def print_dict_as_yaml(d: dict, indent_delta=2) -> str:
|
315 |
+
yaml_lines = construct_dict_as_yaml_lines(d, indent_delta=indent_delta)
|
316 |
# yaml_lines = [re.sub(r"(\n+)", r'"\1"', line) for line in yaml_lines]
|
317 |
# yaml_lines = [line.replace("\n", "\\n") for line in yaml_lines]
|
318 |
return "\n".join(yaml_lines)
|
319 |
|
320 |
+
def print_dict_as_python(d: dict, indent_delta=4) -> str:
|
321 |
+
py_lines = construct_dict_as_python_lines(d, indent_delta=indent_delta)
|
322 |
+
assert len(py_lines)> 0
|
323 |
+
return "\n".join(py_lines)
|
324 |
|
325 |
def nested_tuple_to_string(nested_tuple: tuple) -> str:
|
326 |
"""Converts a nested tuple to a string, with elements separated by underscores.
|
version.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
version = "1.
|
|
|
1 |
+
version = "1.20.0"
|