fluxInpaint-Real

Running on Zero

App Files Files Community

Himanshu-AT commited on 13 days ago

Commit

4352a3f

1 Parent(s): c6696f9

Add gemini.py for generative AI styling prompt generation

Browse files

Files changed (3) hide show

app.py +126 -52
gemini.py +67 -0
segment-anything +1 -0

app.py CHANGED Viewed

@@ -1,25 +1,21 @@
 import gradio as gr
 import numpy as np
-import spaces
 import torch
-import spaces
 import random
-from diffusers import FluxFillPipeline
 from PIL import Image
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16).to("cuda")
 pipe.load_lora_weights("alvdansen/flux-koda")
-# pipe.enable_sequential_cpu_offload()
-# pipe.enable_fp16()
 pipe.enable_lora()
-# pipe.vae.enable_slicing()
-# pipe.vae.enable_tiling()
 def calculate_optimal_dimensions(image: Image.Image):
     # Extract the original dimensions
@@ -52,22 +48,99 @@ def calculate_optimal_dimensions(image: Image.Image):
     elif calculated_aspect_ratio < MIN_ASPECT_RATIO:
         height = (width / MIN_ASPECT_RATIO // 8) * 8
-    # Ensure width and height remain above the minimum dimensions
     width = max(width, 576) if width == FIXED_DIMENSION else width
     height = max(height, 576) if height == FIXED_DIMENSION else height
     return width, height
-@spaces.GPU(durations=300)
-def infer(edit_images, prompt, seed=42, randomize_seed=False, width=1024, height=1024, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
-    # pipe.enable_xformers_memory_efficient_attention()
     image = edit_images["background"]
     width, height = calculate_optimal_dimensions(image)
-    mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    image = pipe(
         prompt=prompt,
         image=image,
         mask_image=mask,
@@ -76,23 +149,14 @@ def infer(edit_images, prompt, seed=42, randomize_seed=False, width=1024, height
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         generator=torch.Generator(device='cuda').manual_seed(seed),
-        # lora_scale=0.75 // not supported in this version
     ).images[0]
-    output_image_jpg = image.convert("RGB")
     output_image_jpg.save("output.jpg", "JPEG")
     return output_image_jpg, seed
-    # return image, seed
-examples = [
-    "photography of a young woman,  accent lighting,  (front view:1.4),  "
-    # "a tiny astronaut hatching from an egg on the moon",
-    # "a cat holding a sign that says hello world",
-    # "an anime illustration of a wiener schnitzel",
-]
-css="""
 #col-container {
     margin: 0 auto;
     max-width: 1000px;
@@ -100,34 +164,51 @@ css="""
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""# FLUX.1 [dev]
-        """)
         with gr.Row():
             with gr.Column():
                 edit_image = gr.ImageEditor(
-                    label='Upload and draw mask for inpainting',
                     type='pil',
                     sources=["upload", "webcam"],
                     image_mode='RGB',
-                    layers=False,
                     brush=gr.Brush(colors=["#FFFFFF"]),
-                    # height=600
                 )
                 prompt = gr.Text(
-                    label="Prompt",
                     show_label=False,
                     max_lines=2,
-                    placeholder="Enter your prompt",
                     container=False,
                 )
                 run_button = gr.Button("Run")
             result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -135,11 +216,8 @@ with gr.Blocks(css=css) as demo:
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
@@ -148,7 +226,6 @@ with gr.Blocks(css=css) as demo:
                     value=1024,
                     visible=False
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
@@ -157,19 +234,16 @@ with gr.Blocks(css=css) as demo:
                     value=1024,
                     visible=False
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance Scale",
                     minimum=1,
                     maximum=30,
                     step=0.5,
-                    value=50,
                 )
                 num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
                     minimum=1,
                     maximum=50,
                     step=1,
@@ -178,9 +252,9 @@ with gr.Blocks(css=css) as demo:
     gr.on(
         triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [edit_image, prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result, seed]
     )
-demo.launch()

 import gradio as gr
 import numpy as np
 import torch
 import random
 from PIL import Image
+import cv2
+# ------------------ Inpainting Pipeline Setup ------------------ #
+from diffusers import FluxFillPipeline
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
+pipe = FluxFillPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
+).to("cuda")
 pipe.load_lora_weights("alvdansen/flux-koda")
 pipe.enable_lora()
 def calculate_optimal_dimensions(image: Image.Image):
     # Extract the original dimensions
     elif calculated_aspect_ratio < MIN_ASPECT_RATIO:
         height = (width / MIN_ASPECT_RATIO // 8) * 8
+    # Ensure minimum dimensions are met
     width = max(width, 576) if width == FIXED_DIMENSION else width
     height = max(height, 576) if height == FIXED_DIMENSION else height
     return width, height
+# ------------------ SAM (Transformers) Imports and Initialization ------------------ #
+from transformers import SamModel, SamProcessor
+# Load the model and processor from Hugging Face.
+sam_model = SamModel.from_pretrained("facebook/sam-vit-base")
+sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+# (The model will run on CPU by default; if you have a CUDA device, you can send the model to "cuda")
+sam_model.to("cuda" if torch.cuda.is_available() else "cpu")
+def generate_mask_with_sam(image: Image.Image, mask_prompt: str):
+    """
+    Generate a segmentation mask using SAM (via Hugging Face Transformers).
+    The mask_prompt is expected to be a comma-separated string of two integers,
+    e.g. "450,600" representing an (x,y) coordinate in the image.
+    The function converts the coordinate into the proper input format for SAM and returns a binary mask.
+    """
+    if mask_prompt.strip() == "":
+        raise ValueError("No mask prompt provided.")
+    try:
+        # Parse the mask_prompt into a coordinate
+        coords = [int(x.strip()) for x in mask_prompt.split(",")]
+        if len(coords) != 2:
+            raise ValueError("Expected two comma-separated integers (x,y).")
+    except Exception as e:
+        raise ValueError("Invalid mask prompt. Please provide coordinates as 'x,y'. Error: " + str(e))
+    # The SAM processor expects a list of input points.
+    # Format the point as a list of lists; here we assume one point per image.
+    # (The Transformers SAM expects the points in [x, y] order.)
+    input_points = [coords]  # e.g. [[450,600]]
+    # Optionally, you can supply input_labels (1 for foreground, 0 for background)
+    input_labels = [1]
+    # Prepare the inputs for the SAM processor.
+    inputs = sam_processor(images=image,
+                           input_points=[input_points],
+                           input_labels=[input_labels],
+                           return_tensors="pt")
+    # Move tensors to the same device as the model.
+    device = next(sam_model.parameters()).device
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Forward pass through SAM.
+    with torch.no_grad():
+        outputs = sam_model(**inputs)
+    # The output contains predicted masks; we take the first mask from the first prompt.
+    # (Assuming outputs.pred_masks is of shape (batch_size, num_masks, H, W))
+    pred_masks = outputs.pred_masks  # Tensor of shape (1, num_masks, H, W)
+    mask = pred_masks[0][0].detach().cpu().numpy()
+    # Convert the mask to binary (0 or 255) using a threshold.
+    mask_bin = (mask > 0.5).astype(np.uint8) * 255
+    mask_pil = Image.fromarray(mask_bin)
+    return mask_pil
+# ------------------ Inference Function ------------------ #
+@gr.blocks.GPU(durations=300)
+def infer(edit_images, prompt, mask_prompt,
+          seed=42, randomize_seed=False, width=1024, height=1024,
+          guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
+    # Get the base image from the "background" layer.
     image = edit_images["background"]
     width, height = calculate_optimal_dimensions(image)
+    # If a mask prompt is provided, use the SAM-based mask generator.
+    if mask_prompt and mask_prompt.strip() != "":
+        try:
+            mask = generate_mask_with_sam(image, mask_prompt)
+        except Exception as e:
+            raise ValueError("Error generating mask from prompt: " + str(e))
+    else:
+        # Fall back to using a manually drawn mask (from the first layer).
+        try:
+            mask = edit_images["layers"][0]
+        except (TypeError, IndexError):
+            raise ValueError("No mask provided. Please either draw a mask or supply a mask prompt.")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # Run the inpainting diffusion pipeline with the provided prompt and mask.
+    image_out = pipe(
         prompt=prompt,
         image=image,
         mask_image=mask,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
         generator=torch.Generator(device='cuda').manual_seed(seed),
     ).images[0]
+    output_image_jpg = image_out.convert("RGB")
     output_image_jpg.save("output.jpg", "JPEG")
     return output_image_jpg, seed
+# ------------------ Gradio UI ------------------ #
+css = """
 #col-container {
     margin: 0 auto;
     max-width: 1000px;
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("# FLUX.1 [dev] with SAM (Transformers) Mask Generation")
         with gr.Row():
             with gr.Column():
+                # The image editor now allows you to optionally draw a mask.
                 edit_image = gr.ImageEditor(
+                    label='Upload Image (and optionally draw a mask)',
                     type='pil',
                     sources=["upload", "webcam"],
                     image_mode='RGB',
+                    layers=False,  # We will generate a mask automatically if needed.
                     brush=gr.Brush(colors=["#FFFFFF"]),
                 )
                 prompt = gr.Text(
+                    label="Inpainting Prompt",
                     show_label=False,
                     max_lines=2,
+                    placeholder="Enter your inpainting prompt",
                     container=False,
                 )
+                mask_prompt = gr.Text(
+                    label="Mask Prompt (enter a coordinate as 'x,y')",
+                    show_label=True,
+                    placeholder="E.g. 450,600",
+                    container=True,
+                )
+                generate_mask_btn = gr.Button("Generate Mask")
+                mask_preview = gr.Image(label="Mask Preview", show_label=True)
                 run_button = gr.Button("Run")
             result = gr.Image(label="Result", show_label=False)
+        # Button to preview the generated mask.
+        def on_generate_mask(image, mask_prompt):
+            if image is None or mask_prompt.strip() == "":
+                return None
+            mask = generate_mask_with_sam(image, mask_prompt)
+            return mask
+        generate_mask_btn.click(
+            fn=on_generate_mask,
+            inputs=[edit_image, mask_prompt],
+            outputs=[mask_preview]
+        )
         with gr.Accordion("Advanced Settings", open=False):
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     value=1024,
                     visible=False
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     value=1024,
                     visible=False
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance Scale",
                     minimum=1,
                     maximum=30,
                     step=0.5,
+                    value=3.5,
                 )
                 num_inference_steps = gr.Slider(
+                    label="Number of Inference Steps",
                     minimum=1,
                     maximum=50,
                     step=1,
     gr.on(
         triggers=[run_button.click, prompt.submit],
+        fn=infer,
+        inputs=[edit_image, prompt, mask_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+        outputs=[result, seed]
     )
+demo.launch()

gemini.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import google.generativeai as genai
+# import os
+# api_key = os.getenv("GEMINI_API_KEY")
+# if not api_key:
+#     raise ValueError("API key not found")
+api_key="AIzaSyBFGmov28dektbVZ5MfPT5o-THOgej2u24"
+genai.configure(api_key=api_key)
+model = genai.GenerativeModel("gemini-1.5-flash")
+prompt1 = """An young indian office lady, standing in front of pot inside office with exotic plants"""
+system_prompt = f"""\
+<SYSTEM_PROMPT>
+Act as a professional stylist and generate highly detailed prompt styling suggestions based on a given look or style preference.
+The details should be highly detailed simialar to how a professional stylist would describe a look. Use EXAMPLE_INPUT and EXAMPLE_OUTPUT \
+as a reference as example for properly understand how exactly you have to describe faces
+Input: A general prompt describing the look or style preference.
+Output: A well-structured, detailed styling guide based on face shape, eyes, lips, hair, skin tone, and body type.
+Step 1: Extract Core Elements
+- Face Shape: Oval, Round, Square, Heart, Diamond, etc.
+- Eye Shape & Color: Almond, Hooded, Monolid, Deep-set; Brown, Blue, Green, etc.
+- Lips: Thin, Full, Defined Cupid’s bow, etc.
+- Hair Type & Length: Straight, Wavy, Curly, Coily; Short, Medium, Long.
+- Skin Tone: Fair, Medium, Olive, Dark, etc.
+- Body Type: Petite, Tall, Athletic, Curvy, etc.
+- Occasion: Casual, Formal, Streetwear, Vintage, Business, Party, etc.
+Step 2: Generate a Detailed Styling Prompt
+Given the extracted details, the generator will create a tailored styling suggestion.
+</SYSTEM_PROMPT>
+<EXAMPLE_INPUT>
+An young indian girl standing in front of a rock wall with visible large rocks
+</EXAMPLE_INPUT>
+✨ Generated Styling Prompt:
+<EXAMPLE_OUTPUT>
+A young Indian girl with warm brown skin and expressive almond-shaped eyes stands
+gracefully in front of a textured rock wall with large, visible stones. Her long,
+wavy black hair cascades over her shoulders, catching the soft sunlight. She
+wears a flowing, earth-toned bohemian dress that complements the rugged background,
+with delicate golden jewelry adding a subtle elegance. Her full lips curve into a
+serene smile as she gazes into the distance, embodying a harmonious blend of strength
+and grace.
+</EXAMPLE_OUTPUT>
+<RULES>
+- you should only return the prompt and nothing else
+- you should not return the system prompt
+- you should not return any other details from prompt
+</RULES>
+<NORMAL_PROMPT>
+${prompt1}
+</NORMAL_PROMPT>
+"""
+response = model.generate_content(system_prompt)
+print(response.text)

segment-anything ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit dca509fe793f601edb92606367a655c15ac00fdf