gradio_test / testToTraining.py
ajsbsd's picture
qwen.ai helper
f52daa3
raw
history blame contribute delete
890 Bytes
# text_to_training_csv.py
import sys
import csv
def main():
if len(sys.argv) < 2:
print("Usage: python text_to_training_csv.py <chunk_size>", file=sys.stderr)
sys.exit(1)
try:
chunk_size = int(sys.argv[1])
except ValueError:
print("Error: Chunk size must be an integer.", file=sys.stderr)
sys.exit(1)
writer = csv.writer(sys.stdout)
writer.writerow(["id", "text"])
id_counter = 1
buffer = ''
while True:
chunk = sys.stdin.read(chunk_size)
if not chunk:
break
buffer += chunk
if len(buffer) >= chunk_size:
writer.writerow([id_counter, buffer[:chunk_size]])
id_counter += 1
buffer = buffer[chunk_size:]
# Write remaining buffer
if buffer:
writer.writerow([id_counter, buffer])
if __name__ == "__main__":
main()