Spaces:
Running
Running
id: "alpaca-to-chatml" | |
title: "Convert Alpaca to Conversation Format" | |
slug: "alpaca-to-chatml" | |
description: "Convert Alpaca format to ChatML Conversation format" | |
code: | | |
-- Convert Alpaca format to Conversation format | |
WITH | |
source_view AS ( | |
SELECT * FROM train -- Change 'train' to your desired view name here | |
) | |
SELECT | |
[ | |
struct_pack( | |
"from" := 'user', | |
"value" := CASE | |
WHEN input IS NOT NULL AND input != '' | |
THEN instruction || '\n\n' || input | |
ELSE instruction | |
END | |
), | |
struct_pack( | |
"from" := 'assistant', | |
"value" := output | |
) | |
] AS conversation | |
FROM source_view | |
WHERE instruction IS NOT NULL | |
AND output IS NOT NULL; | |
# Converting Alpaca to ChatML Conversation Format | |
```sql | |
-- Convert Alpaca format to Conversation format | |
WITH | |
source_view AS ( | |
SELECT * FROM train -- Change 'train' to your desired view name here | |
) | |
SELECT | |
[ | |
struct_pack( | |
"from" := 'user', | |
"value" := CASE | |
WHEN input IS NOT NULL AND input != '' | |
THEN instruction || '\n\n' || input | |
ELSE instruction | |
END | |
), | |
struct_pack( | |
"from" := 'assistant', | |
"value" := output | |
) | |
] AS conversation | |
FROM source_view | |
WHERE instruction IS NOT NULL | |
AND output IS NOT NULL; | |
``` | |
## Why? | |
Differences between Alpaca and ChatML Conversation Format: | |
1. **Alpaca Format**: | |
- The Alpaca format usually has three columns: `instruction`, `input`, and `output`. | |
2. **ChatML Conversation Format**: | |
- The ChatML Conversation format is a JSON format that contains a list of messages. | |
- Each message has a `from` field, which can be either `system`, `user`, or `assistant`. | |
- The `value` field contains the message content. | |
## Example | |
### `yahma/alpaca-cleaned` | |
<iframe | |
src="https://huggingface.co/datasets/yahma/alpaca-cleaned/embed/viewer/default/train" | |
frameborder="0" | |
width="100%" | |
height="560px" | |
></iframe> | |
You can run this query through via the `sql_console` in the Hugging Face Hub [here](https://huggingface.co/datasets/yahma/alpaca-cleaned?row=16&sql_console=true&sql=++++--+Convert+Alpaca+format+to+Conversation+format%0A++++WITH+%0A++++source_view+AS+%28%0A++++++SELECT+*+FROM+train++--+Change+%27train%27+to+your+desired+view+name+here%0A++++%29%0A++++SELECT+%0A++++++%5B%0A++++++++struct_pack%28%0A++++++++++%22from%22+%3A%3D+%27user%27%2C%0A++++++++++%22value%22+%3A%3D+CASE+%0A++++++++++++++++++++++WHEN+input+IS+NOT+NULL+AND+input+%21%3D+%27%27+%0A++++++++++++++++++++++THEN+instruction+%7C%7C+%27%5Cn%5Cn%27+%7C%7C+input%0A++++++++++++++++++++++ELSE+instruction%0A++++++++++++++++++++END%0A++++++++%29%2C%0A++++++++struct_pack%28%0A++++++++++%22from%22+%3A%3D+%27assistant%27%2C%0A++++++++++%22value%22+%3A%3D+output%0A++++++++%29%0A++++++%5D+AS+conversation%0A++++FROM+source_view%0A++++WHERE+instruction+IS+NOT+NULL+%0A++++++AND+output+IS+NOT+NULL%3B). | |
 |