File size: 890 Bytes
f52daa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# text_to_training_csv.py

import sys
import csv

def main():
    if len(sys.argv) < 2:
        print("Usage: python text_to_training_csv.py <chunk_size>", file=sys.stderr)
        sys.exit(1)

    try:
        chunk_size = int(sys.argv[1])
    except ValueError:
        print("Error: Chunk size must be an integer.", file=sys.stderr)
        sys.exit(1)

    writer = csv.writer(sys.stdout)
    writer.writerow(["id", "text"])

    id_counter = 1
    buffer = ''

    while True:
        chunk = sys.stdin.read(chunk_size)
        if not chunk:
            break
        buffer += chunk

        if len(buffer) >= chunk_size:
            writer.writerow([id_counter, buffer[:chunk_size]])
            id_counter += 1
            buffer = buffer[chunk_size:]

    # Write remaining buffer
    if buffer:
        writer.writerow([id_counter, buffer])

if __name__ == "__main__":
    main()