Spaces:

xiaozaa
/

catvton-flux-try-on

Runtime error

App Files Files Community

xiaozaa commited on Nov 24, 2024

Commit

1beac4e

0 Parent(s):

fist commit

Browse files

Files changed (14) hide show

.gitignore +52 -0
README.md +50 -0
example/garment/00034_00.jpg +0 -0
example/garment/00035_00.jpg +0 -0
example/garment/04564_00.jpg +0 -0
example/person/00008_00.jpg +0 -0
example/person/00008_00_mask.png +0 -0
example/person/1.jpg +0 -0
example/person/1_mask.png +0 -0
example/result/1.png +0 -0
example/result/2.png +0 -0
example/result/3.png +0 -0
requirements.txt +98 -0
tryon_inference.py +118 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,52 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+dist/
+build/
+*.egg-info/
+# Virtual environments
+venv/
+env/
+.env/
+.venv/
+# IDE specific files
+.idea/
+.vscode/
+*.swp
+*.swo
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+coverage.xml
+*.cover
+# Jupyter Notebook
+.ipynb_checkpoints
+# Local development settings
+.env
+.env.local
+# Logs
+*.log
+# Database files
+*.db
+*.sqlite3
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db

README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# catvton-flux
+An advanced virtual try-on solution that combines the power of [CATVTON](https://arxiv.org/abs/2407.15886) (Contrastive Appearance and Topology Virtual Try-On) with Flux fill inpainting model for realistic and accurate clothing transfer.
+## Showcase
+| Original | Result |
+|----------|--------|
+| ![Original](example/person/1.jpg) | ![Result](example/result/1.png) |
+| ![Original](example/person/00008_00.jpg) | ![Result](example/result/2.png) |
+| ![Original](example/person/00008_00.jpg) | ![Result](example/result/3.png) |
+## Model Weights
+The model weights are trained on the [VITON-HD](https://github.com/shadow2496/VITON-HD) dataset.
+🤗 [catvton-flux-alpha](https://huggingface.co/xiaozaa/catvton-flux-alpha)
+## Prerequisites
+```bash
+bash
+conda create -n flux python=3.10
+conda activate flux
+pip install -r requirements.txt
+```
+## Usage
+```bash
+python tryon_inference.py \
+--image ./example/person/00008_00.jpg \
+--mask ./example/person/00008_00_mask.png \
+--garment ./example/garment/00034_00.jpg \
+--seed 42
+```
+## TODO:
+- [ ] Release the FID score
+- [ ] Add gradio demo
+- [ ] Release updated weights with better performance
+## Citation
+```bibtex
+@misc{jiang2024catvton,
+title={CATVTON: A Contrastive Approach for Virtual Try-On Network},
+author={Chao Jiang and Xujie Zhang}
+}
+```
+## License
+- The code is licensed under the MIT License.
+- The model weights have the same license as Flux.1 Fill and VITON-HD.

example/garment/00034_00.jpg ADDED Viewed

example/garment/00035_00.jpg ADDED Viewed

example/garment/04564_00.jpg ADDED Viewed

example/person/00008_00.jpg ADDED Viewed

example/person/00008_00_mask.png ADDED Viewed

example/person/1.jpg ADDED Viewed

example/person/1_mask.png ADDED Viewed

example/result/1.png ADDED Viewed

example/result/2.png ADDED Viewed

example/result/3.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,98 @@

+accelerate==0.30.1
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
+aiosignal==1.3.1
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+attrs==24.2.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.2.1
+cycler==0.12.1
+datasets==2.21.0
+deepspeed==0.14.4
+dill==0.3.8
+docker-pycreds==0.4.0
+einops==0.8.0
+filelock==3.15.4
+flatbuffers==24.3.25
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+hjson==3.1.0
+huggingface-hub==0.24.5
+humanfriendly==10.0
+idna==3.7
+importlib_metadata==8.2.0
+Jinja2==3.1.4
+kiwisolver==1.4.5
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+networkx==3.3
+ninja==1.11.1.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-ml-py==12.555.43
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.6.20
+nvidia-nvtx-cu12==12.1.105
+omegaconf==2.3.0
+onnxruntime-gpu==1.18.1
+opencv-python==4.10.0.84
+optimum-quanto==0.2.4
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+platformdirs==4.2.2
+protobuf==5.27.3
+psutil==6.0.0
+py-cpuinfo==9.0.0
+pyarrow==17.0.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+pytz==2024.1
+PyYAML==6.0.2
+regex==2024.7.24
+requests==2.32.3
+safetensors==0.4.4
+sentencepiece==0.2.0
+sentry-sdk==2.13.0
+setproctitle==1.3.3
+six==1.16.0
+smmap==5.0.1
+sympy==1.13.2
+timm==1.0.8
+tokenizers==0.19.1
+torch==2.4.0
+torchvision==0.19.0
+tqdm==4.66.5
+transformers==4.43.3
+triton==3.0.0
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+wandb==0.17.6
+xxhash==3.4.1
+yarl==1.9.4
+zipp==3.20.0
+peft==0.13.2
+bitsandbytes==0.44.1
+prodigyopt
+git+https://github.com/huggingface/diffusers.git

tryon_inference.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import argparse
+import torch
+from diffusers.utils import load_image, check_min_version
+from diffusers import FluxPriorReduxPipeline, FluxFillPipeline
+from diffusers import FluxTransformer2DModel
+import numpy as np
+from torchvision import transforms
+def run_inference(
+    image_path,
+    mask_path,
+    garment_path,
+    output_garment_path=None,
+    output_tryon_path='flux_inpaint_tryon.png',
+    size=(576, 768),
+    num_steps=50,
+    guidance_scale=30,
+    seed=42,
+    pipe=None
+):
+    # Build pipeline
+    if pipe is None:
+        transformer = FluxTransformer2DModel.from_pretrained(
+            "xiaozaa/catvton-flux-alpha",
+            torch_dtype=torch.bfloat16
+        )
+        pipe = FluxFillPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-dev",
+            transformer=transformer,
+            torch_dtype=torch.bfloat16
+        ).to("cuda")
+    else:
+        pipe.to("cuda")
+    pipe.transformer.to(torch.bfloat16)
+    # Add transform
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize([0.5], [0.5])  # For RGB images
+    ])
+    mask_transform = transforms.Compose([
+        transforms.ToTensor()
+    ])
+    # Load and process images
+    print("image_path", image_path)
+    image = load_image(image_path).convert("RGB").resize(size)
+    mask = load_image(mask_path).convert("RGB").resize(size)
+    garment = load_image(garment_path).convert("RGB").resize(size)
+    # Transform images using the new preprocessing
+    image_tensor = transform(image)
+    mask_tensor = mask_transform(mask)[:1]  # Take only first channel
+    garment_tensor = transform(garment)
+    # Create concatenated images
+    inpaint_image = torch.cat([garment_tensor, image_tensor], dim=2)  # Concatenate along width
+    garment_mask = torch.zeros_like(mask_tensor)
+    extended_mask = torch.cat([garment_mask, mask_tensor], dim=2)
+    prompt = f"The pair of images highlights a clothing and its styling on a model, high resolution, 4K, 8K; " \
+            f"[IMAGE1] Detailed product shot of a clothing" \
+            f"[IMAGE2] The same cloth is worn by a model in a lifestyle setting."
+    generator = torch.Generator(device="cuda").manual_seed(seed)
+    result = pipe(
+        height=size[1],
+        width=size[0] * 2,
+        image=inpaint_image,
+        mask_image=extended_mask,
+        num_inference_steps=num_steps,
+        generator=generator,
+        max_sequence_length=512,
+        guidance_scale=guidance_scale,
+        prompt=prompt,
+    ).images[0]
+    # Split and save results
+    width = size[0]
+    garment_result = result.crop((0, 0, width, size[1]))
+    tryon_result = result.crop((width, 0, width * 2, size[1]))
+    if output_garment_path is not None:
+        garment_result.save(output_garment_path)
+    tryon_result.save(output_tryon_path)
+    return garment_result, tryon_result
+def main():
+    parser = argparse.ArgumentParser(description='Run FLUX virtual try-on inference')
+    parser.add_argument('--image', required=True, help='Path to the model image')
+    parser.add_argument('--mask', required=True, help='Path to the agnostic mask')
+    parser.add_argument('--garment', required=True, help='Path to the garment image')
+    parser.add_argument('--output-garment', default='flux_inpaint_garment.png', help='Output path for garment result')
+    parser.add_argument('--output-tryon', default='flux_inpaint_tryon.png', help='Output path for try-on result')
+    parser.add_argument('--steps', type=int, default=50, help='Number of inference steps')
+    parser.add_argument('--guidance-scale', type=float, default=30, help='Guidance scale')
+    parser.add_argument('--seed', type=int, default=0, help='Random seed')
+    args = parser.parse_args()
+    check_min_version("0.30.2")
+    garment_result, tryon_result = run_inference(
+        image_path=args.image,
+        mask_path=args.mask,
+        garment_path=args.garment,
+        output_garment_path=args.output_garment,
+        output_tryon_path=args.output_tryon,
+        num_steps=args.steps,
+        guidance_scale=args.guidance_scale,
+        seed=args.seed
+    )
+    print("Successfully saved garment and try-on images")
+if __name__ == "__main__":
+    main()