Spaces:
Runtime error
Runtime error
| import argparse | |
| import json | |
| import pathlib | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--path", | |
| type=str, | |
| required=True, | |
| help="Path to folder with image-text pairs.", | |
| ) | |
| parser.add_argument("--caption_column", type=str, default="prompt", help="Name of caption column.") | |
| args = parser.parse_args() | |
| path = pathlib.Path(args.path) | |
| if not path.exists(): | |
| raise RuntimeError(f"`--path` '{args.path}' does not exist.") | |
| all_files = list(path.glob("*")) | |
| captions = list(path.glob("*.txt")) | |
| images = set(all_files) - set(captions) | |
| images = {image.stem: image for image in images} | |
| caption_image = {caption: images.get(caption.stem) for caption in captions if images.get(caption.stem)} | |
| metadata = path.joinpath("metadata.jsonl") | |
| with metadata.open("w", encoding="utf-8") as f: | |
| for caption, image in caption_image.items(): | |
| caption_text = caption.read_text(encoding="utf-8") | |
| json.dump({"file_name": image.name, args.caption_column: caption_text}, f) | |
| f.write("\n") | |