Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Sample Dataset Creation Script | |
| Creates sample datasets for testing SmolLM3 fine-tuning | |
| """ | |
| import os | |
| import json | |
| import argparse | |
| from data import create_sample_dataset | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Create sample dataset for SmolLM3 fine-tuning') | |
| parser.add_argument('--output_dir', type=str, default='my_dataset', | |
| help='Output directory for the dataset') | |
| parser.add_argument('--format', type=str, default='chat', | |
| choices=['chat', 'instruction', 'user_assistant'], | |
| help='Dataset format') | |
| parser.add_argument('--num_samples', type=int, default=100, | |
| help='Number of samples to create') | |
| args = parser.parse_args() | |
| # Create sample dataset | |
| output_path = create_sample_dataset(args.output_dir) | |
| print(f"Sample dataset created in: {output_path}") | |
| print(f"Format: {args.format}") | |
| print(f"Samples: {args.num_samples}") | |
| print("\nFiles created:") | |
| print(f"- {os.path.join(output_path, 'train.json')}") | |
| print(f"- {os.path.join(output_path, 'validation.json')}") | |
| # Show sample data | |
| with open(os.path.join(output_path, 'train.json'), 'r') as f: | |
| data = json.load(f) | |
| print(f"\nSample data:") | |
| print(json.dumps(data[0], indent=2)) | |
| if __name__ == '__main__': | |
| main() |