File size: 1,374 Bytes
734e414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
roles_map = {
    'system': 'system',
    'user': 'user',
    'human': 'user',
    'assistant': 'assistant',
    'gpt': 'assistant',
    'AI': 'assistant',
}


pretrain_reflection_datasets = [
    #
    # reflection
    #
    # 4.17 MB, 1,000
    {'kind': 'instruct', 'path': 'dvilasuero/reflection-v1-gpt-4o-judge', 'transform': lambda r: [
        {'role': 'system', 'content': r['system']},
        {'role': 'user', 'content': r['prompt']},
        {'role': 'assistant', 'content': r['response']},
    ]},
    # 12.4 MB, 3,000
    {'kind': 'instruct', 'path': 'dvilasuero/reflection-v1-openai-o-mini-judge', 'transform': lambda r: [
        {'role': 'system', 'content': r['system']},
        {'role': 'user', 'content': r['prompt']},
        {'role': 'assistant', 'content': r['response']},
    ]},
    # 70.8 MB, 36,549
    {'kind': 'instruct', 'path': 'dvilasuero/reflection-v1-final-dedup', 'transform': lambda r: [
        {'role': 'system', 'content': r['system']},
        {'role': 'user', 'content': r['prompt']},
        {'role': 'assistant', 'content': r['response']},
    ]},
    # 30.6 MB, 25,391
    {'kind': 'instruct', 'path': 'flozi00/reflection-qwen2.5-72b-260924', 'transform': lambda r: [
        r['system'][0],
        {'role': 'user', 'content': r['input']},
        {'role': 'assistant', 'content': r['reflection'] + '\n' + r['output']},
    ]},
]