Spaces:
Running
Running
File size: 890 Bytes
f52daa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# text_to_training_csv.py
import sys
import csv
def main():
if len(sys.argv) < 2:
print("Usage: python text_to_training_csv.py <chunk_size>", file=sys.stderr)
sys.exit(1)
try:
chunk_size = int(sys.argv[1])
except ValueError:
print("Error: Chunk size must be an integer.", file=sys.stderr)
sys.exit(1)
writer = csv.writer(sys.stdout)
writer.writerow(["id", "text"])
id_counter = 1
buffer = ''
while True:
chunk = sys.stdin.read(chunk_size)
if not chunk:
break
buffer += chunk
if len(buffer) >= chunk_size:
writer.writerow([id_counter, buffer[:chunk_size]])
id_counter += 1
buffer = buffer[chunk_size:]
# Write remaining buffer
if buffer:
writer.writerow([id_counter, buffer])
if __name__ == "__main__":
main()
|