import os
import tempfile
from typing import Any
import torch
import numpy as np
from PIL import Image
import gradio as gr
import trimesh
from transparent_background import Remover
from diffusers import DiffusionPipeline

# Import and setup SPAR3D 
os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
import spar3d.utils as spar3d_utils
from spar3d.system import SPAR3D

# Constants
COND_WIDTH = 512
COND_HEIGHT = 512
COND_DISTANCE = 2.2
COND_FOVY = 0.591627
BACKGROUND_COLOR = [0.5, 0.5, 0.5]

# Initialize models
device = spar3d_utils.get_device()
bg_remover = Remover()
spar3d_model = SPAR3D.from_pretrained(
    "stabilityai/stable-point-aware-3d",
    config_name="config.yaml",
    weight_name="model.safetensors"
).eval().to(device)

# Initialize FLUX model
dtype = torch.bfloat16
flux_pipe = DiffusionPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell", 
    torch_dtype=dtype
).to(device)

# Initialize camera parameters
c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
    COND_FOVY, COND_HEIGHT, COND_WIDTH
)

def create_batch(input_image: Image) -> dict[str, Any]:
    """Prepare image batch for model input."""
    img_cond = (
        torch.from_numpy(
            np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
            / 255.0
        )
        .float()
        .clip(0, 1)
    )
    mask_cond = img_cond[:, :, -1:]
    rgb_cond = torch.lerp(
        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
    )

    batch = {
        "rgb_cond": rgb_cond.unsqueeze(0),
        "mask_cond": mask_cond.unsqueeze(0),
        "c2w_cond": c2w_cond.unsqueeze(0),
        "intrinsic_cond": intrinsic.unsqueeze(0),
        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
    }
    return batch

def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> str:
    """Generate image from prompt and convert to 3D model."""
    try:
        # Generate image using FLUX
        generator = torch.Generator().manual_seed(seed)
        generated_image = flux_pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=4,
            generator=generator,
            guidance_scale=0.0
        ).images[0]
        
        # Convert PIL image to RGBA
        input_image = generated_image.convert("RGBA")
        
        # Remove background if needed
        input_image = bg_remover.process(input_image.convert("RGB"))
        
        # Auto crop
        input_image = spar3d_utils.foreground_crop(
            input_image,
            crop_ratio=1.3,
            newsize=(COND_WIDTH, COND_HEIGHT),
            no_crop=False
        )

        # Prepare batch
        batch = create_batch(input_image)
        batch = {k: v.to(device) for k, v in batch.items()}

        # Generate mesh
        with torch.no_grad():
            with torch.autocast(device_type=device, dtype=torch.bfloat16):
                trimesh_mesh, _ = spar3d_model.generate_mesh(
                    batch,
                    1024,  # texture_resolution
                    remesh="none",
                    vertex_count=-1,
                    estimate_illumination=True
                )
                trimesh_mesh = trimesh_mesh[0]

        # Export to GLB
        temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
        trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
        
        return temp_file.name, generated_image
        
    except Exception as e:
        return str(e), None

# Create Gradio interface
examples = [
    "a tiny astronaut hatching from an egg on the moon",
    "a cat holding a sign that says hello world",
    "an anime illustration of a wiener schnitzel",
]

demo = gr.Interface(
    fn=generate_and_process_3d,
    inputs=[
        gr.Text(
            label="Enter your prompt",
            placeholder="Describe what you want to generate..."
        ),
        gr.Slider(
            label="Seed",
            minimum=0,
            maximum=np.iinfo(np.int32).max,
            step=1,
            value=42
        ),
        gr.Slider(
            label="Width",
            minimum=256,
            maximum=2048,
            step=32,
            value=1024
        ),
        gr.Slider(
            label="Height",
            minimum=256,
            maximum=2048,
            step=32,
            value=1024
        )
    ],
    outputs=[
        gr.File(
            label="Download GLB",
            file_types=[".glb"],
        ),
        gr.Image(
            label="Generated Image",
            type="pil"
        )
    ],
    title="Text to 3D Model Generator",
    description="Enter a text prompt to generate an image that will be converted into a 3D model",
    examples=examples
)

if __name__ == "__main__":
    demo.launch()