Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # coding=utf-8 | |
| """ | |
| Commonly used constants. | |
| """ | |
| TEXT_ONLY_DATASET_DESCRIPTION = ( | |
| """ | |
| "text_only": a dataset with only raw text instances, with following format: | |
| { | |
| "type": "text_only", | |
| "instances": [ | |
| { "text": "TEXT_1" }, | |
| { "text": "TEXT_2" }, | |
| ... | |
| ] | |
| } | |
| """ | |
| ).lstrip("\n") | |
| TEXT_ONLY_DATASET_DETAILS = ( | |
| """ | |
| For example, | |
| ```python | |
| from lmflow.datasets import Dataset | |
| data_dict = { | |
| "type": "text_only", | |
| "instances": [ | |
| { "text": "Human: Hello. Bot: Hi!" }, | |
| { "text": "Human: How are you today? Bot: Fine, thank you!" }, | |
| ] | |
| } | |
| dataset = Dataset.create_from_dict(data_dict) | |
| ``` | |
| You may also save the corresponding format to json, | |
| ```python | |
| import json | |
| from lmflow.args import DatasetArguments | |
| from lmflow.datasets import Dataset | |
| data_dict = { | |
| "type": "text_only", | |
| "instances": [ | |
| { "text": "Human: Hello. Bot: Hi!" }, | |
| { "text": "Human: How are you today? Bot: Fine, thank you!" }, | |
| ] | |
| } | |
| with open("data.json", "w") as fout: | |
| json.dump(data_dict, fout) | |
| data_args = DatasetArgument(dataset_path="data.json") | |
| dataset = Dataset(data_args) | |
| new_data_dict = dataset.to_dict() | |
| # `new_data_dict` Should have the same content as `data_dict` | |
| ``` | |
| """ | |
| ).lstrip("\n") | |
| TEXT2TEXT_DATASET_DESCRIPTION = ( | |
| """ | |
| "text2text": a dataset with input & output instances, with following format: | |
| { | |
| "type": "text2text", | |
| "instances": [ | |
| { "input": "INPUT_1", "output": "OUTPUT_1" }, | |
| { "input": "INPUT_2", "output": "OUTPUT_2" }, | |
| ... | |
| ] | |
| } | |
| """ | |
| ).lstrip("\n") | |
| TEXT2TEXT_DATASET_DETAILS = ( | |
| """ | |
| For example, | |
| ```python | |
| from lmflow.datasets import Dataset | |
| data_dict = { | |
| "type": "text2text", | |
| "instances": [ | |
| { | |
| "input": "Human: Hello.", | |
| "output": "Bot: Hi!", | |
| }, | |
| { | |
| "input": "Human: How are you today?", | |
| "output": "Bot: Fine, thank you! And you?", | |
| } | |
| ] | |
| } | |
| dataset = Dataset.create_from_dict(data_dict) | |
| ``` | |
| You may also save the corresponding format to json, | |
| ```python | |
| import json | |
| from lmflow.args import DatasetArguments | |
| from lmflow.datasets import Dataset | |
| data_dict = { | |
| "type": "text2text", | |
| "instances": [ | |
| { | |
| "input": "Human: Hello.", | |
| "output": "Bot: Hi!", | |
| }, | |
| { | |
| "input": "Human: How are you today?", | |
| "output": "Bot: Fine, thank you! And you?", | |
| } | |
| ] | |
| } | |
| with open("data.json", "w") as fout: | |
| json.dump(data_dict, fout) | |
| data_args = DatasetArgument(dataset_path="data.json") | |
| dataset = Dataset(data_args) | |
| new_data_dict = dataset.to_dict() | |
| # `new_data_dict` Should have the same content as `data_dict` | |
| ``` | |
| """ | |
| ).lstrip("\n") | |
| TEXT_ONLY_DATASET_LONG_DESCRITION = ( | |
| TEXT_ONLY_DATASET_DESCRIPTION + TEXT_ONLY_DATASET_DETAILS | |
| ) | |
| TEXT2TEXT_DATASET_LONG_DESCRITION = ( | |
| TEXT2TEXT_DATASET_DESCRIPTION + TEXT2TEXT_DATASET_DETAILS | |
| ) | |