Spaces:
Running
on
Zero
Running
on
Zero
update v0.2
Browse files- app.py +81 -20
- assets/ref1..jpg +0 -0
- assets/ref2..jpg +0 -0
- assets/ref3..jpg +0 -0
- assets/ref4..jpg +0 -0
- assets/ref_cat..jpg +0 -0
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
|
3 |
import torch
|
4 |
-
import spaces
|
5 |
import safetensors
|
6 |
import gradio as gr
|
7 |
from PIL import Image
|
@@ -16,7 +16,7 @@ from transformer_flux_custom import FluxTransformer2DModel as FluxTransformer2DM
|
|
16 |
|
17 |
model_config = './config.json'
|
18 |
pretrained_model_name = 'black-forest-labs/FLUX.1-dev'
|
19 |
-
adapter_path = 'model.safetensors'
|
20 |
adapter_repo_id = "ashen0209/Flux-Character-Consitancy"
|
21 |
|
22 |
conditioner_base_model = 'eva02_large_patch14_448.mim_in22k_ft_in1k'
|
@@ -56,11 +56,12 @@ IMAGE_PROCESS_TRANSFORM = transforms.Compose([
|
|
56 |
transforms.Normalize(mean=[0.4815, 0.4578, 0.4082], std=[0.2686, 0.2613, 0.276])
|
57 |
])
|
58 |
|
59 |
-
@spaces.GPU
|
60 |
-
def generate_image(ref_image, prompt, height=512, width=512, num_steps=25, guidance_scale=3.5, ip_scale=1.0):
|
|
|
61 |
with torch.no_grad():
|
62 |
image_refs = map(torch.stack, [
|
63 |
-
[IMAGE_PROCESS_TRANSFORM(i) for i in [ref_image, ]]
|
64 |
])
|
65 |
image_refs = [i.to(dtype=torch.bfloat16, device='cuda') for i in image_refs]
|
66 |
prompt_embeds, pooled_prompt_embeds, txt_ids = pipe.encode_prompt(prompt, prompt)
|
@@ -77,21 +78,81 @@ def generate_image(ref_image, prompt, height=512, width=512, num_steps=25, guida
|
|
77 |
num_inference_steps=num_steps,
|
78 |
guidance_scale=guidance_scale,
|
79 |
).images[0]
|
80 |
-
return image
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
if __name__ == "__main__":
|
97 |
iface.launch()
|
|
|
1 |
import os
|
2 |
|
3 |
import torch
|
4 |
+
# import spaces
|
5 |
import safetensors
|
6 |
import gradio as gr
|
7 |
from PIL import Image
|
|
|
16 |
|
17 |
model_config = './config.json'
|
18 |
pretrained_model_name = 'black-forest-labs/FLUX.1-dev'
|
19 |
+
adapter_path = 'model-v0.2.safetensors'
|
20 |
adapter_repo_id = "ashen0209/Flux-Character-Consitancy"
|
21 |
|
22 |
conditioner_base_model = 'eva02_large_patch14_448.mim_in22k_ft_in1k'
|
|
|
56 |
transforms.Normalize(mean=[0.4815, 0.4578, 0.4082], std=[0.2686, 0.2613, 0.276])
|
57 |
])
|
58 |
|
59 |
+
# @spaces.GPU
|
60 |
+
def generate_image(ref_image, ref_image2, prompt, height=512, width=512, num_steps=25, guidance_scale=3.5, seed=0, ip_scale=1.0):
|
61 |
+
print(f"ref_image: {ref_image.size}, prompt: {prompt}, height: {height}, width: {width}, num_steps: {num_steps}, guidance_scale: {guidance_scale}, ip_scale: {ip_scale}")
|
62 |
with torch.no_grad():
|
63 |
image_refs = map(torch.stack, [
|
64 |
+
[IMAGE_PROCESS_TRANSFORM(i) for i in [ref_image, ref_image2] if i is not None]
|
65 |
])
|
66 |
image_refs = [i.to(dtype=torch.bfloat16, device='cuda') for i in image_refs]
|
67 |
prompt_embeds, pooled_prompt_embeds, txt_ids = pipe.encode_prompt(prompt, prompt)
|
|
|
78 |
num_inference_steps=num_steps,
|
79 |
guidance_scale=guidance_scale,
|
80 |
).images[0]
|
81 |
+
return image
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
examples = [
|
86 |
+
["assets/ref1.jpg", None, "A woman dancing in the dessert", 512, 768],
|
87 |
+
["assets/ref1.jpg", "assets/ref_cat.jpg", "A woman holding a cat above her head", 768, 512],
|
88 |
+
["assets/ref2.jpg", None, "A woman sitting on the beach near the sea", 512, 768],
|
89 |
+
|
90 |
+
]
|
91 |
+
|
92 |
+
with gr.Blocks() as demo:
|
93 |
+
# Top-level inputs that are always visible
|
94 |
+
with gr.Row():
|
95 |
+
gr.Markdown("""
|
96 |
+
## Character Consistancy Image Generation based on Flux
|
97 |
+
""")
|
98 |
+
|
99 |
+
with gr.Row():
|
100 |
+
with gr.Column():
|
101 |
+
with gr.Row():
|
102 |
+
ref_image = gr.Image(type="pil", label="Upload Reference Subject Image", width=300)
|
103 |
+
ref_image2 = gr.Image(type="pil", label="[Optional] compliment or different category", width=200)
|
104 |
+
description = gr.Textbox(lines=2, placeholder="Describe the desired contents", label="Description Text")
|
105 |
+
generate_btn = gr.Button("Generate Image")
|
106 |
+
|
107 |
+
# Advanced options hidden inside an accordion (click to expand)
|
108 |
+
with gr.Accordion("Advanced Options", open=False):
|
109 |
+
height_slider = gr.Slider(minimum=256, maximum=1024, value=512, step=64, label="Height")
|
110 |
+
width_slider = gr.Slider(minimum=256, maximum=1024, value=512, step=64, label="Width")
|
111 |
+
steps_slider = gr.Slider(minimum=20, maximum=50, value=25, step=1, label="Number of Steps")
|
112 |
+
guidance_slider = gr.Slider(minimum=1.0, maximum=8.0, value=3.5, step=0.1, label="Guidance Scale")
|
113 |
+
ref_scale_slider = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Reference Image Scale")
|
114 |
+
|
115 |
+
with gr.Column():
|
116 |
+
output = gr.Image(type="pil", label="Generated Image")
|
117 |
+
# with gr.Row():
|
118 |
+
with gr.Group():
|
119 |
+
with gr.Row(equal_height=True):
|
120 |
+
with gr.Column(scale=1, min_width=50, ):
|
121 |
+
randomize_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
|
122 |
+
with gr.Column(scale=3, min_width=100):
|
123 |
+
seed_io = gr.Number(label="Seed (if not randomizing)", value=0, interactive=True, )
|
124 |
+
|
125 |
+
with gr.Row():
|
126 |
+
gr.Examples(
|
127 |
+
label='Click on following examples to load and try',
|
128 |
+
examples=examples,
|
129 |
+
inputs=[ref_image, ref_image2, description, height_slider, width_slider],
|
130 |
+
fn=generate_image,
|
131 |
+
outputs=output,
|
132 |
+
# example_labels=['Reference Subject', 'Additional Reference', 'Prompt', 'Height', 'Width'],
|
133 |
+
cache_examples=True,
|
134 |
+
cache_mode='lazy'
|
135 |
+
)
|
136 |
+
|
137 |
+
with gr.Row():
|
138 |
+
gr.Markdown("""
|
139 |
+
### Tips:
|
140 |
+
- Images with human subjects tend to perform better than other categories.
|
141 |
+
- Images where the subject occupies most of the frame with a clean, uncluttered background yield improved results.
|
142 |
+
- Including multiple subjects of the same category may cause blending issues (this is being improved).
|
143 |
+
- Despite these factors, most image inputs still produce reasonable and satisfactory results.
|
144 |
+
""")
|
145 |
+
# When the button is clicked, pass all inputs to generate_image
|
146 |
+
generate_btn.click(
|
147 |
+
fn=generate_image,
|
148 |
+
inputs=[ref_image, ref_image2, description, height_slider, width_slider, steps_slider, guidance_slider, ref_scale_slider],
|
149 |
+
outputs=output,
|
150 |
+
)
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
if __name__ == "__main__":
|
155 |
+
demo.launch()
|
156 |
|
157 |
if __name__ == "__main__":
|
158 |
iface.launch()
|
assets/ref1..jpg
ADDED
![]() |
assets/ref2..jpg
ADDED
![]() |
assets/ref3..jpg
ADDED
![]() |
assets/ref4..jpg
ADDED
![]() |
assets/ref_cat..jpg
ADDED
![]() |