zs38 commited on
Commit
03e6b18
·
1 Parent(s): 102c482

update v0.2

Browse files
Files changed (6) hide show
  1. app.py +81 -20
  2. assets/ref1..jpg +0 -0
  3. assets/ref2..jpg +0 -0
  4. assets/ref3..jpg +0 -0
  5. assets/ref4..jpg +0 -0
  6. assets/ref_cat..jpg +0 -0
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
 
3
  import torch
4
- import spaces
5
  import safetensors
6
  import gradio as gr
7
  from PIL import Image
@@ -16,7 +16,7 @@ from transformer_flux_custom import FluxTransformer2DModel as FluxTransformer2DM
16
 
17
  model_config = './config.json'
18
  pretrained_model_name = 'black-forest-labs/FLUX.1-dev'
19
- adapter_path = 'model.safetensors'
20
  adapter_repo_id = "ashen0209/Flux-Character-Consitancy"
21
 
22
  conditioner_base_model = 'eva02_large_patch14_448.mim_in22k_ft_in1k'
@@ -56,11 +56,12 @@ IMAGE_PROCESS_TRANSFORM = transforms.Compose([
56
  transforms.Normalize(mean=[0.4815, 0.4578, 0.4082], std=[0.2686, 0.2613, 0.276])
57
  ])
58
 
59
- @spaces.GPU
60
- def generate_image(ref_image, prompt, height=512, width=512, num_steps=25, guidance_scale=3.5, ip_scale=1.0):
 
61
  with torch.no_grad():
62
  image_refs = map(torch.stack, [
63
- [IMAGE_PROCESS_TRANSFORM(i) for i in [ref_image, ]]
64
  ])
65
  image_refs = [i.to(dtype=torch.bfloat16, device='cuda') for i in image_refs]
66
  prompt_embeds, pooled_prompt_embeds, txt_ids = pipe.encode_prompt(prompt, prompt)
@@ -77,21 +78,81 @@ def generate_image(ref_image, prompt, height=512, width=512, num_steps=25, guida
77
  num_inference_steps=num_steps,
78
  guidance_scale=guidance_scale,
79
  ).images[0]
80
- return image
81
-
82
- iface = gr.Interface(
83
- fn=generate_image,
84
- inputs=[
85
- gr.Image(type="pil", label="Upload Reference Subject Image"),
86
- gr.Textbox(lines=2, placeholder="Describe the desired contents", label="Description Text"),
87
- gr.Slider(minimum=256, maximum=1024, value=512, label="Height"),
88
- gr.Slider(minimum=256, maximum=1024, value=512, label="Width"),
89
- gr.Slider(minimum=20, maximum=50, value=25, label="Number of Steps"),
90
- gr.Slider(minimum=1.0, maximum=8.0, value=3.5, label="Guidance Scale"),
91
- gr.Slider(minimum=0.0, maximum=2.0, value=1.0, label="Reference image Scale"),
92
- ],
93
- outputs=gr.Image(type="pil", label="Generated Image"),
94
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  if __name__ == "__main__":
97
  iface.launch()
 
1
  import os
2
 
3
  import torch
4
+ # import spaces
5
  import safetensors
6
  import gradio as gr
7
  from PIL import Image
 
16
 
17
  model_config = './config.json'
18
  pretrained_model_name = 'black-forest-labs/FLUX.1-dev'
19
+ adapter_path = 'model-v0.2.safetensors'
20
  adapter_repo_id = "ashen0209/Flux-Character-Consitancy"
21
 
22
  conditioner_base_model = 'eva02_large_patch14_448.mim_in22k_ft_in1k'
 
56
  transforms.Normalize(mean=[0.4815, 0.4578, 0.4082], std=[0.2686, 0.2613, 0.276])
57
  ])
58
 
59
+ # @spaces.GPU
60
+ def generate_image(ref_image, ref_image2, prompt, height=512, width=512, num_steps=25, guidance_scale=3.5, seed=0, ip_scale=1.0):
61
+ print(f"ref_image: {ref_image.size}, prompt: {prompt}, height: {height}, width: {width}, num_steps: {num_steps}, guidance_scale: {guidance_scale}, ip_scale: {ip_scale}")
62
  with torch.no_grad():
63
  image_refs = map(torch.stack, [
64
+ [IMAGE_PROCESS_TRANSFORM(i) for i in [ref_image, ref_image2] if i is not None]
65
  ])
66
  image_refs = [i.to(dtype=torch.bfloat16, device='cuda') for i in image_refs]
67
  prompt_embeds, pooled_prompt_embeds, txt_ids = pipe.encode_prompt(prompt, prompt)
 
78
  num_inference_steps=num_steps,
79
  guidance_scale=guidance_scale,
80
  ).images[0]
81
+ return image
82
+
83
+
84
+
85
+ examples = [
86
+ ["assets/ref1.jpg", None, "A woman dancing in the dessert", 512, 768],
87
+ ["assets/ref1.jpg", "assets/ref_cat.jpg", "A woman holding a cat above her head", 768, 512],
88
+ ["assets/ref2.jpg", None, "A woman sitting on the beach near the sea", 512, 768],
89
+
90
+ ]
91
+
92
+ with gr.Blocks() as demo:
93
+ # Top-level inputs that are always visible
94
+ with gr.Row():
95
+ gr.Markdown("""
96
+ ## Character Consistancy Image Generation based on Flux
97
+ """)
98
+
99
+ with gr.Row():
100
+ with gr.Column():
101
+ with gr.Row():
102
+ ref_image = gr.Image(type="pil", label="Upload Reference Subject Image", width=300)
103
+ ref_image2 = gr.Image(type="pil", label="[Optional] compliment or different category", width=200)
104
+ description = gr.Textbox(lines=2, placeholder="Describe the desired contents", label="Description Text")
105
+ generate_btn = gr.Button("Generate Image")
106
+
107
+ # Advanced options hidden inside an accordion (click to expand)
108
+ with gr.Accordion("Advanced Options", open=False):
109
+ height_slider = gr.Slider(minimum=256, maximum=1024, value=512, step=64, label="Height")
110
+ width_slider = gr.Slider(minimum=256, maximum=1024, value=512, step=64, label="Width")
111
+ steps_slider = gr.Slider(minimum=20, maximum=50, value=25, step=1, label="Number of Steps")
112
+ guidance_slider = gr.Slider(minimum=1.0, maximum=8.0, value=3.5, step=0.1, label="Guidance Scale")
113
+ ref_scale_slider = gr.Slider(minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="Reference Image Scale")
114
+
115
+ with gr.Column():
116
+ output = gr.Image(type="pil", label="Generated Image")
117
+ # with gr.Row():
118
+ with gr.Group():
119
+ with gr.Row(equal_height=True):
120
+ with gr.Column(scale=1, min_width=50, ):
121
+ randomize_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
122
+ with gr.Column(scale=3, min_width=100):
123
+ seed_io = gr.Number(label="Seed (if not randomizing)", value=0, interactive=True, )
124
+
125
+ with gr.Row():
126
+ gr.Examples(
127
+ label='Click on following examples to load and try',
128
+ examples=examples,
129
+ inputs=[ref_image, ref_image2, description, height_slider, width_slider],
130
+ fn=generate_image,
131
+ outputs=output,
132
+ # example_labels=['Reference Subject', 'Additional Reference', 'Prompt', 'Height', 'Width'],
133
+ cache_examples=True,
134
+ cache_mode='lazy'
135
+ )
136
+
137
+ with gr.Row():
138
+ gr.Markdown("""
139
+ ### Tips:
140
+ - Images with human subjects tend to perform better than other categories.
141
+ - Images where the subject occupies most of the frame with a clean, uncluttered background yield improved results.
142
+ - Including multiple subjects of the same category may cause blending issues (this is being improved).
143
+ - Despite these factors, most image inputs still produce reasonable and satisfactory results.
144
+ """)
145
+ # When the button is clicked, pass all inputs to generate_image
146
+ generate_btn.click(
147
+ fn=generate_image,
148
+ inputs=[ref_image, ref_image2, description, height_slider, width_slider, steps_slider, guidance_slider, ref_scale_slider],
149
+ outputs=output,
150
+ )
151
+
152
+
153
+
154
+ if __name__ == "__main__":
155
+ demo.launch()
156
 
157
  if __name__ == "__main__":
158
  iface.launch()
assets/ref1..jpg ADDED
assets/ref2..jpg ADDED
assets/ref3..jpg ADDED
assets/ref4..jpg ADDED
assets/ref_cat..jpg ADDED