Spaces:
Running
Running
# text_to_training_csv.py | |
import sys | |
import csv | |
def main(): | |
if len(sys.argv) < 2: | |
print("Usage: python text_to_training_csv.py <chunk_size>", file=sys.stderr) | |
sys.exit(1) | |
try: | |
chunk_size = int(sys.argv[1]) | |
except ValueError: | |
print("Error: Chunk size must be an integer.", file=sys.stderr) | |
sys.exit(1) | |
writer = csv.writer(sys.stdout) | |
writer.writerow(["id", "text"]) | |
id_counter = 1 | |
buffer = '' | |
while True: | |
chunk = sys.stdin.read(chunk_size) | |
if not chunk: | |
break | |
buffer += chunk | |
if len(buffer) >= chunk_size: | |
writer.writerow([id_counter, buffer[:chunk_size]]) | |
id_counter += 1 | |
buffer = buffer[chunk_size:] | |
# Write remaining buffer | |
if buffer: | |
writer.writerow([id_counter, buffer]) | |
if __name__ == "__main__": | |
main() | |