Spaces:

Himanshu806
/

finalProduct

Running on Zero

App Files Files Community

Himanshu-AT commited on Feb 2

Commit

61dc46d

1 Parent(s): 71f7331

update ui, add download button + set inpaint

Browse files

Files changed (1) hide show

app.py +88 -28

app.py CHANGED Viewed

@@ -37,16 +37,54 @@ for model_name, model_path in lora_models.items():
 lora_models["None"] = None
 @spaces.GPU(durations=300)
-def infer(edit_images, prompt, width, height, lora_model, seed=42, randomize_seed=False, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     # pipe.enable_xformers_memory_efficient_attention()
     if lora_model != "None":
         pipe.load_lora_weights(lora_models[lora_model])
         pipe.enable_lora()
     image = edit_images["background"]
-    # width, height = calculate_optimal_dimensions(image)
     mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -72,6 +110,13 @@ def infer(edit_images, prompt, width, height, lora_model, seed=42, randomize_see
     return output_image_jpg, seed
     # return image, seed
 examples = [
     "photography of a young woman,  accent lighting,  (front view:1.4),  "
     # "a tiny astronaut hatching from an egg on the moon",
@@ -150,31 +195,46 @@ with gr.Blocks(css=css) as demo:
                     value=28,
                 )
-            with gr.Row():
-                width = gr.Slider(
-                    label="width",
-                    minimum=512,
-                    maximum=3072,
-                    step=1,
-                    value=1024,
-                )
-                height = gr.Slider(
-                    label="height",
-                    minimum=512,
-                    maximum=3072,
-                    step=1,
-                    value=1024,
-                )
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn = infer,
-        inputs = [edit_image, prompt, width, height, lora_model, seed, randomize_seed, guidance_scale, num_inference_steps],
         outputs = [result, seed]
     )
 # demo.launch()
 PASSWORD = os.getenv("GRADIO_PASSWORD")
 USERNAME = os.getenv("GRADIO_USERNAME")
@@ -262,12 +322,12 @@ demo.launch(auth=authenticate)
 #     The mask_prompt is expected to be a comma-separated string of two integers,
 #     e.g. "450,600" representing an (x,y) coordinate in the image.
 #     The function converts the coordinate into the proper input format for SAM and returns a binary mask.
 #     """
 #     if mask_prompt.strip() == "":
 #         raise ValueError("No mask prompt provided.")
 #     try:
 #         # Parse the mask_prompt into a coordinate
 #         coords = [int(x.strip()) for x in mask_prompt.split(",")]
@@ -275,33 +335,33 @@ demo.launch(auth=authenticate)
 #             raise ValueError("Expected two comma-separated integers (x,y).")
 #     except Exception as e:
 #         raise ValueError("Invalid mask prompt. Please provide coordinates as 'x,y'. Error: " + str(e))
 #     # The SAM processor expects a list of input points.
 #     # Format the point as a list of lists; here we assume one point per image.
 #     # (The Transformers SAM expects the points in [x, y] order.)
 #     input_points = [coords]  # e.g. [[450,600]]
 #     # Optionally, you can supply input_labels (1 for foreground, 0 for background)
 #     input_labels = [1]
 #     # Prepare the inputs for the SAM processor.
 #     inputs = sam_processor(images=image,
 #                            input_points=[input_points],
 #                            input_labels=[input_labels],
 #                            return_tensors="pt")
 #     # Move tensors to the same device as the model.
 #     device = next(sam_model.parameters()).device
 #     inputs = {k: v.to(device) for k, v in inputs.items()}
 #     # Forward pass through SAM.
 #     with torch.no_grad():
 #         outputs = sam_model(**inputs)
 #     # The output contains predicted masks; we take the first mask from the first prompt.
 #     # (Assuming outputs.pred_masks is of shape (batch_size, num_masks, H, W))
 #     pred_masks = outputs.pred_masks  # Tensor of shape (1, num_masks, H, W)
 #     mask = pred_masks[0][0].detach().cpu().numpy()
 #     # Convert the mask to binary (0 or 255) using a threshold.
 #     mask_bin = (mask > 0.5).astype(np.uint8) * 255
 #     mask_pil = Image.fromarray(mask_bin)
@@ -387,14 +447,14 @@ demo.launch(auth=authenticate)
 #                 mask_preview = gr.Image(label="Mask Preview", show_label=True)
 #                 run_button = gr.Button("Run")
 #             result = gr.Image(label="Result", show_label=False)
 #         # Button to preview the generated mask.
 #         def on_generate_mask(image, mask_prompt):
 #             if image is None or mask_prompt.strip() == "":
 #                 return None
 #             mask = generate_mask_with_sam(image, mask_prompt)
 #             return mask
 #         generate_mask_btn.click(
 #             fn=on_generate_mask,
 #             inputs=[edit_image, mask_prompt],

 lora_models["None"] = None
+def calculate_optimal_dimensions(image: Image.Image):
+    # Extract the original dimensions
+    original_width, original_height = image.size
+    # Set constants
+    MIN_ASPECT_RATIO = 9 / 16
+    MAX_ASPECT_RATIO = 16 / 9
+    FIXED_DIMENSION = 1024
+    # Calculate the aspect ratio of the original image
+    original_aspect_ratio = original_width / original_height
+    # Determine which dimension to fix
+    if original_aspect_ratio > 1:  # Wider than tall
+        width = FIXED_DIMENSION
+        height = round(FIXED_DIMENSION / original_aspect_ratio)
+    else:  # Taller than wide
+        height = FIXED_DIMENSION
+        width = round(FIXED_DIMENSION * original_aspect_ratio)
+    # Ensure dimensions are multiples of 8
+    width = (width // 8) * 8
+    height = (height // 8) * 8
+    # Enforce aspect ratio limits
+    calculated_aspect_ratio = width / height
+    if calculated_aspect_ratio > MAX_ASPECT_RATIO:
+        width = (height * MAX_ASPECT_RATIO // 8) * 8
+    elif calculated_aspect_ratio < MIN_ASPECT_RATIO:
+        height = (width / MIN_ASPECT_RATIO // 8) * 8
+    # Ensure width and height remain above the minimum dimensions
+    width = max(width, 576) if width == FIXED_DIMENSION else width
+    height = max(height, 576) if height == FIXED_DIMENSION else height
+    return width, height
 @spaces.GPU(durations=300)
+def infer(edit_images, prompt, lora_model, seed=42, randomize_seed=False, guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     # pipe.enable_xformers_memory_efficient_attention()
     if lora_model != "None":
         pipe.load_lora_weights(lora_models[lora_model])
         pipe.enable_lora()
     image = edit_images["background"]
+    width, height = calculate_optimal_dimensions(image)
     mask = edit_images["layers"][0]
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     return output_image_jpg, seed
     # return image, seed
+def download_image(image):
+    image.save("output.png", "PNG")
+    return "output.png"
+def set_image_as_inpaint(image):
+    return image
 examples = [
     "photography of a young woman,  accent lighting,  (front view:1.4),  "
     # "a tiny astronaut hatching from an egg on the moon",
                     value=28,
                 )
+            # with gr.Row():
+            #     width = gr.Slider(
+            #         label="width",
+            #         minimum=512,
+            #         maximum=3072,
+            #         step=1,
+            #         value=1024,
+            #     )
+            #     height = gr.Slider(
+            #         label="height",
+            #         minimum=512,
+            #         maximum=3072,
+            #         step=1,
+            #         value=1024,
+            #     )
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn = infer,
+        inputs = [edit_image, prompt, lora_model, seed, randomize_seed, guidance_scale, num_inference_steps],
         outputs = [result, seed]
     )
+    download_button = gr.Button("Download Image as PNG")
+    set_inpaint_button = gr.Button("Set Image as Inpaint")
+    download_button.click(
+            fn=download_image,
+            inputs=[result],
+            outputs=gr.File(label="Download Image")
+        )
+    set_inpaint_button.click(
+            fn=set_image_as_inpaint,
+            inputs=[result],
+            outputs=[edit_image]
+    )
 # demo.launch()
 PASSWORD = os.getenv("GRADIO_PASSWORD")
 USERNAME = os.getenv("GRADIO_USERNAME")
 #     The mask_prompt is expected to be a comma-separated string of two integers,
 #     e.g. "450,600" representing an (x,y) coordinate in the image.
 #     The function converts the coordinate into the proper input format for SAM and returns a binary mask.
 #     """
 #     if mask_prompt.strip() == "":
 #         raise ValueError("No mask prompt provided.")
 #     try:
 #         # Parse the mask_prompt into a coordinate
 #         coords = [int(x.strip()) for x in mask_prompt.split(",")]
 #             raise ValueError("Expected two comma-separated integers (x,y).")
 #     except Exception as e:
 #         raise ValueError("Invalid mask prompt. Please provide coordinates as 'x,y'. Error: " + str(e))
 #     # The SAM processor expects a list of input points.
 #     # Format the point as a list of lists; here we assume one point per image.
 #     # (The Transformers SAM expects the points in [x, y] order.)
 #     input_points = [coords]  # e.g. [[450,600]]
 #     # Optionally, you can supply input_labels (1 for foreground, 0 for background)
 #     input_labels = [1]
 #     # Prepare the inputs for the SAM processor.
 #     inputs = sam_processor(images=image,
 #                            input_points=[input_points],
 #                            input_labels=[input_labels],
 #                            return_tensors="pt")
 #     # Move tensors to the same device as the model.
 #     device = next(sam_model.parameters()).device
 #     inputs = {k: v.to(device) for k, v in inputs.items()}
 #     # Forward pass through SAM.
 #     with torch.no_grad():
 #         outputs = sam_model(**inputs)
 #     # The output contains predicted masks; we take the first mask from the first prompt.
 #     # (Assuming outputs.pred_masks is of shape (batch_size, num_masks, H, W))
 #     pred_masks = outputs.pred_masks  # Tensor of shape (1, num_masks, H, W)
 #     mask = pred_masks[0][0].detach().cpu().numpy()
 #     # Convert the mask to binary (0 or 255) using a threshold.
 #     mask_bin = (mask > 0.5).astype(np.uint8) * 255
 #     mask_pil = Image.fromarray(mask_bin)
 #                 mask_preview = gr.Image(label="Mask Preview", show_label=True)
 #                 run_button = gr.Button("Run")
 #             result = gr.Image(label="Result", show_label=False)
 #         # Button to preview the generated mask.
 #         def on_generate_mask(image, mask_prompt):
 #             if image is None or mask_prompt.strip() == "":
 #                 return None
 #             mask = generate_mask_with_sam(image, mask_prompt)
 #             return mask
 #         generate_mask_btn.click(
 #             fn=on_generate_mask,
 #             inputs=[edit_image, mask_prompt],