Spaces:

yaseengoldfinchpc
/

modeltest

Sleeping

App Files Files Community

yaseengoldfinchpc commited on Jan 18

Commit

43c5517

1 Parent(s): 3beeaa7

Git Push

Browse files

Files changed (16) hide show

.env.example +2 -0
.gitignore +13 -0
Dockerfile +26 -0
README.md +8 -11
app.py +108 -0
config.py +8 -0
convertToOnx.py +115 -0
example_client.py +29 -0
generated_mask.png +0 -0
generated_mask_1.png +0 -0
inpainted_result.png +0 -0
model_index.json +7 -0
requirements.txt +11 -0
test.py +92 -0
test2.py +101 -0
test_app.py +10 -0

.env.example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ HF_TOKEN=your_huggingface_token_here
2	+ MAX_FILE_SIZE=10485760 # 10MB in bytes

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+*.safetensors
+__pycache__/
+*.pyc
+.env
+onnx_output/*
+OutPutModel/*
+.pytest_cache/
+.coverage
+htmlcov/
+.venv/
+.idea/
+.vscode/
+*.log

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM nvidia/cuda:11.8.0-runtime-ubuntu22.04
+# Add non-root user
+RUN useradd -m -u 1000 user
+WORKDIR /home/user/app
+# Install dependencies
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY --chown=user:user requirements.txt .
+RUN pip3 install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY --chown=user:user . .
+# Switch to non-root user
+USER user
+# Expose port
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

README.md CHANGED Viewed

@@ -1,11 +1,8 @@
----
-title: Modeltest
-emoji: 🌍
-colorFrom: indigo
-colorTo: purple
-sdk: docker
-pinned: false
-short_description: ModelTes
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Stable Diffusion Inpainting API
+emoji: 🎨
+colorFrom: blue
+colorTo: purple
+sdk: docker
+app_port: 7860
+---

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+import torch
+from PIL import Image
+import io
+import base64
+from diffusers import StableDiffusionInpaintPipeline
+import gc
+from fastapi.responses import JSONResponse
+import logging
+app = FastAPI()
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global variable for the model
+pipe = None
+def load_model():
+    global pipe
+    if pipe is None:
+        # Use the pre-uploaded model from Hugging Face
+        model_id = "Uminosachi/realisticVisionV51_v51VAE-inpainting"
+        pipe = StableDiffusionInpaintPipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch.float16,
+            safety_checker=None
+        ).to("cuda")
+        pipe.enable_attention_slicing(slice_size="max")
+        pipe.enable_sequential_cpu_offload()
+    return pipe
+@app.on_event("startup")
+async def startup_event():
+    if torch.cuda.is_available():
+        load_model()
+def image_to_base64(image: Image.Image) -> str:
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode()
+@app.post("/inpaint")
+async def inpaint(
+    image: UploadFile = File(...),
+    mask: UploadFile = File(...),
+    prompt: str = "add some flowers and a fountain",
+    negative_prompt: str = "blurry, low quality, distorted"
+):
+    try:
+        # Add file size check (10MB limit)
+        max_size = 10 * 1024 * 1024  # 10MB
+        if len(await image.read()) > max_size or len(await mask.read()) > max_size:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "File size too large. Maximum size is 10MB"}
+            )
+        # Reset file positions
+        await image.seek(0)
+        await mask.seek(0)
+        # Read and process input image
+        image_data = await image.read()
+        mask_data = await mask.read()
+        original_image = Image.open(io.BytesIO(image_data))
+        mask_image = Image.open(io.BytesIO(mask_data))
+        # Resize to multiple of 8
+        width, height = (dim - dim % 8 for dim in original_image.size)
+        original_image = original_image.resize((width, height))
+        mask_image = mask_image.resize((width, height))
+        mask_image = mask_image.convert("L")
+        # Perform inpainting
+        with torch.cuda.amp.autocast():
+            output_image = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                image=original_image,
+                mask_image=mask_image,
+                num_inference_steps=20,
+                guidance_scale=7.5,
+            ).images[0]
+        # Convert output image to base64
+        output_base64 = image_to_base64(output_image)
+        # Clean up
+        torch.cuda.empty_cache()
+        gc.collect()
+        return {"status": "success", "image": output_base64}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "cuda_available": torch.cuda.is_available()}

config.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)

convertToOnx.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import torch
+from diffusers import StableDiffusionInpaintPipeline
+import os
+def convert_to_onnx(model_path, output_dir):
+    os.makedirs(output_dir, exist_ok=True)
+    # Load the pipeline
+    pipe = StableDiffusionInpaintPipeline.from_single_file(
+        model_path
+    )
+    # Move to CPU and ensure float32
+    pipe = pipe.to("cpu")
+    pipe.to(torch.float32)
+    # Set to evaluation mode
+    pipe.unet.eval()
+    pipe.vae.eval()
+    pipe.text_encoder.eval()
+    # First convert the image through VAE to get correct latent dimensions
+    with torch.no_grad():
+        # Sample random latent in correct shape
+        latent_height = 64  # standard height for SD latents
+        latent_width = 64   # standard width for SD latents
+        # Create sample inputs for UNet
+        # The UNet expects concatenated latent + mask channels
+        latents = torch.randn(1, 4, latent_height, latent_width, dtype=torch.float32)
+        mask = torch.ones(1, 1, latent_height, latent_width, dtype=torch.float32)
+        masked_image_latents = torch.randn(1, 4, latent_height, latent_width, dtype=torch.float32)
+        masked_latents = torch.cat([latents, masked_image_latents, mask], dim=1)  # 4 + 4 + 1 = 9 channels
+        # Time embeddings
+        timestep = torch.tensor([1], dtype=torch.int64)
+        # Text embeddings (77 is the standard sequence length)
+        text_embeddings = torch.randn(1, 77, 768, dtype=torch.float32)
+        # Export UNet
+        pipe.text_encoder.text_model.encoder.layers[0].self_attn.scale = torch.tensor(0.125, dtype=torch.float32)
+        torch.onnx.export(
+            pipe.unet,
+            args=(masked_latents, timestep, text_embeddings),
+            f=f"{output_dir}/unet.onnx",
+            input_names=["sample", "timestep", "encoder_hidden_states"],
+            output_names=["out_sample"],
+            dynamic_axes={
+                "sample": {0: "batch", 2: "height", 3: "width"},
+                "encoder_hidden_states": {0: "batch", 1: "sequence"},
+                "out_sample": {0: "batch", 2: "height", 3: "width"}
+            },
+            opset_version=17,
+            export_params=True
+        )
+        # Export VAE Decoder
+        vae_latent = torch.randn(1, 4, latent_height, latent_width, dtype=torch.float32)
+        torch.onnx.export(
+            pipe.vae.decoder,
+            args=(vae_latent,),
+            f=f"{output_dir}/vae_decoder.onnx",
+            input_names=["latent"],
+            output_names=["image"],
+            dynamic_axes={
+                "latent": {0: "batch", 2: "height", 3: "width"},
+                "image": {0: "batch", 2: "height", 3: "width"}
+            },
+            opset_version=17,
+            export_params=True
+        )
+        # Export Text Encoder
+        input_ids = torch.ones(1, 77, dtype=torch.int64)
+        torch.onnx.export(
+            pipe.text_encoder,
+            args=(input_ids,),
+            f=f"{output_dir}/text_encoder.onnx",
+            input_names=["input_ids"],
+            output_names=["last_hidden_state", "pooler_output"],
+            dynamic_axes={
+                "input_ids": {0: "batch"},
+                "last_hidden_state": {0: "batch"},
+                "pooler_output": {0: "batch"}
+            },
+            opset_version=17,
+            export_params=True
+        )
+    print("Conversion completed successfully!")
+    return True
+def verify_paths(model_path):
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file not found at: {model_path}")
+    print(f"Model file found at: {model_path}")
+    return True
+if __name__ == "__main__":
+    # Set your paths here
+    model_path = "realisticVisionV60B1_v51VAE-inpainting.safetensors"
+    output_dir = "onnx_output"
+    try:
+        verify_paths(model_path)
+        success = convert_to_onnx(model_path, output_dir)
+        if success:
+            print(f"ONNX models saved to: {output_dir}")
+    except Exception as e:
+        print(f"Error during conversion: {str(e)}")
+        raise  # Re-raise the exception to see full traceback

example_client.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import requests
+import base64
+from PIL import Image
+import io
+def call_inpaint_api(image_path, mask_path, prompt):
+    # Update this with your actual space URL after deployment
+    url = "https://your-username-your-space-name.hf.space/inpaint"
+    files = {
+        'image': open(image_path, 'rb'),
+        'mask': open(mask_path, 'rb')
+    }
+    data = {
+        'prompt': prompt
+    }
+    response = requests.post(url, files=files, data=data)
+    if response.status_code == 200:
+        # Decode base64 image
+        img_data = base64.b64decode(response.json()['image'])
+        img = Image.open(io.BytesIO(img_data))
+        img.save('result.png')
+        return 'result.png'
+    else:
+        print(f"Error: {response.text}")
+        return None

generated_mask.png ADDED Viewed

generated_mask_1.png ADDED Viewed

inpainted_result.png ADDED Viewed

model_index.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "modelId": "realisticVisionV60B1",
+    "model_type": "stable-diffusion-inpainting",
+    "_class_name": "StableDiffusionInpaintPipeline",
+    "scheduler": ["DDIMScheduler", "EulerAncestralDiscreteScheduler", "DPMSolverMultistepScheduler"],
+    "torch_dtype": "float16"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+python-multipart
+torch
+diffusers
+transformers
+pillow
+uvicorn
+huggingface_hub
+python-jose[cryptography]
+passlib[bcrypt]
+python-dotenv

test.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from diffusers import StableDiffusionInpaintPipeline
+import torch
+from PIL import Image
+import os
+def setup_model(model_path):
+    # Load the base pipeline
+    pipe = StableDiffusionInpaintPipeline.from_single_file(
+        model_path,
+        torch_dtype=torch.float16,
+        safety_checker=None
+    )
+    # Move to GPU if available
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    pipe = pipe.to(device)
+    # Enable memory optimizations
+    pipe.enable_attention_slicing()
+    return pipe
+def prepare_images(image_path, mask_path=None):
+    # Load and prepare the original image
+    original_image = Image.open(image_path)
+    # Resize to a multiple of 8 (required by Stable Diffusion)
+    width, height = (dim - dim % 8 for dim in original_image.size)
+    original_image = original_image.resize((width, height))
+    if mask_path:
+        # Load existing mask if provided
+        mask_image = Image.open(mask_path)
+        mask_image = mask_image.resize((width, height))
+        mask_image = mask_image.convert("L")
+    else:
+        # Create a simple rectangular mask in the center
+        mask_image = Image.new("L", (width, height), 0)
+        mask_width = width // 3
+        mask_height = height // 3
+        x1 = (width - mask_width) // 2
+        y1 = (height - mask_height) // 2
+        x2 = x1 + mask_width
+        y2 = y1 + mask_height
+        for y in range(y1, y2):
+            for x in range(x1, x2):
+                mask_image.putpixel((x, y), 255)
+    return original_image, mask_image
+def main():
+    # Setup paths using raw strings
+    model_path = "realisticVisionV60B1_v51VAE-inpainting.safetensors"
+    image_path = r"C:\Users\M. Y\Downloads\t2.png"
+    # First install accelerate if not already installed
+    try:
+        import accelerate
+    except ImportError:
+        print("Installing accelerate...")
+        os.system("pip install accelerate")
+    # Initialize model
+    print("Loading model...")
+    pipe = setup_model(model_path)
+    # Prepare images
+    print("Preparing images...")
+    original_image, mask_image = prepare_images(image_path)
+    # Save mask for verification
+    mask_image.save("generated_mask.png")
+    # Define your prompt
+    prompt = "a realistic photo of a beautiful garden"
+    negative_prompt = "blurry, low quality, distorted"
+    print("Performing inpainting...")
+    output_image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        image=original_image,
+        mask_image=mask_image,
+        num_inference_steps=30,
+        guidance_scale=7.5,
+    ).images[0]
+    # Save the result
+    output_image.save("inpainted_result.png")
+    print("Inpainting completed! Check 'inpainted_result.png' for the result.")
+if __name__ == "__main__":
+    main()

test2.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from diffusers import StableDiffusionInpaintPipeline
+import torch
+from PIL import Image
+import os
+from torch.multiprocessing import set_start_method
+import gc
+def setup_model(model_path):
+    # Clear CUDA memory
+    torch.cuda.empty_cache()
+    gc.collect()
+    pipe = StableDiffusionInpaintPipeline.from_single_file(
+        model_path,
+        torch_dtype=torch.float16,
+        safety_checker=None
+    ).to("cuda")
+    # Enable memory optimizations without xformers
+    pipe.enable_attention_slicing(slice_size="max")
+    pipe.enable_sequential_cpu_offload()
+    return pipe
+def prepare_images(image_path, mask_path=None):
+    # Load and prepare the original image
+    original_image = Image.open(image_path)
+    # Resize to a multiple of 8 (required by Stable Diffusion)
+    width, height = (dim - dim % 8 for dim in original_image.size)
+    original_image = original_image.resize((width, height))
+    if mask_path:
+        mask_image = Image.open(mask_path)
+        mask_image = mask_image.resize((width, height))
+        mask_image = mask_image.convert("L")
+    else:
+        # Create a simple rectangular mask in the center
+        mask_image = Image.new("L", (width, height), 0)
+        mask_width = width // 3
+        mask_height = height // 3
+        x1 = (width - mask_width) // 2
+        y1 = (height - mask_height) // 2
+        x2 = x1 + mask_width
+        y2 = y1 + mask_height
+        for y in range(y1, y2):
+            for x in range(x1, x2):
+                mask_image.putpixel((x, y), 255)
+    return original_image, mask_image
+def main():
+    # Setup paths using raw strings
+    model_path = "realisticVisionV60B1_v51VAE-inpainting.safetensors"
+    image_path = r"C:\Users\M. Y\Downloads\t2.png"
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        print(f"GPU: {torch.cuda.get_device_name()}")
+        print(f"Memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f}MB")
+    # Initialize model
+    print("Loading model...")
+    pipe = setup_model(model_path)
+    # Prepare images
+    print("Preparing images...")
+    original_image, mask_image = prepare_images(image_path)
+    # Save mask for verification
+    mask_image.save("generated_mask.png")
+    mask_image_1 = Image.open("generated_mask_1.png")
+    # Define your prompt
+    prompt = "add some flowers and a fountain"
+    negative_prompt = "blurry, low quality, distorted"
+    print("Performing inpainting...")
+    with torch.cuda.amp.autocast():  # Use automatic mixed precision
+        output_image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=original_image,
+            mask_image=mask_image_1,
+            num_inference_steps=20,  # Reduced steps for faster generation
+            guidance_scale=7.5,
+        ).images[0]
+    # Save the result
+    output_image.save("inpainted_result.png")
+    print("Inpainting completed! Check 'inpainted_result.png' for the result.")
+    # Clean up
+    torch.cuda.empty_cache()
+    gc.collect()
+if __name__ == "__main__":
+    try:
+        set_start_method('spawn')
+    except RuntimeError:
+        pass
+    main()

test_app.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from fastapi.testclient import TestClient
+from app import app
+import pytest
+client = TestClient(app)
+def test_health_check():
+    response = client.get("/health")
+    assert response.status_code == 200
+    assert "status" in response.json()