Update handler.py
Browse files- handler.py +19 -2
handler.py
CHANGED
@@ -70,7 +70,7 @@ apply_dirty_hack_to_patch_file_extensions_and_bypass_filter("/repository")
|
|
70 |
#print_directory_structure("/repository")
|
71 |
|
72 |
|
73 |
-
def process_input_image(image_data: str, target_width: int, target_height: int) -> Image.Image:
|
74 |
"""
|
75 |
Process input image from base64, resize and crop to target dimensions
|
76 |
|
@@ -78,6 +78,7 @@ def process_input_image(image_data: str, target_width: int, target_height: int)
|
|
78 |
image_data: Base64 encoded image data
|
79 |
target_width: Desired width
|
80 |
target_height: Desired height
|
|
|
81 |
|
82 |
Returns:
|
83 |
Processed PIL Image
|
@@ -127,6 +128,15 @@ def process_input_image(image_data: str, target_width: int, target_height: int)
|
|
127 |
bottom = top + target_height
|
128 |
|
129 |
image = image.crop((left, top, right, bottom))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
return image
|
132 |
|
@@ -146,6 +156,10 @@ class GenerationConfig:
|
|
146 |
width: int = 768
|
147 |
height: int = 416
|
148 |
|
|
|
|
|
|
|
|
|
149 |
# users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
|
150 |
# The value must be a multiple of 8, plus 1 frame.
|
151 |
# visual glitches appear after about 169 frames, so we don't need more actually
|
@@ -319,6 +333,7 @@ class EndpointHandler:
|
|
319 |
- negative_prompt (optional, string): list of concepts to ignore in the video.
|
320 |
- width (optional, int, default to 768): width, or horizontal size in pixels.
|
321 |
- height (optional, int, default to 512): height, or vertical size in pixels.
|
|
|
322 |
- num_frames (optional, int, default to 129): the numer of frames must be a multiple of 8, plus 1 frame.
|
323 |
- guidance_scale (optional, float, default to 3.5): Guidance scale (values between 3.0 and 4.0 are nice)
|
324 |
- num_inference_steps (optional, int, default to 50): number of inference steps
|
@@ -362,6 +377,7 @@ class EndpointHandler:
|
|
362 |
# video model settings (will be used during generation of the initial raw video clip)
|
363 |
width=params.get("width", GenerationConfig.width),
|
364 |
height=params.get("height", GenerationConfig.height),
|
|
|
365 |
num_frames=params.get("num_frames", GenerationConfig.num_frames),
|
366 |
guidance_scale=params.get("guidance_scale", GenerationConfig.guidance_scale),
|
367 |
num_inference_steps=params.get("num_inference_steps", GenerationConfig.num_inference_steps),
|
@@ -422,7 +438,8 @@ class EndpointHandler:
|
|
422 |
processed_image = process_input_image(
|
423 |
input_image,
|
424 |
config.width,
|
425 |
-
config.height
|
|
|
426 |
)
|
427 |
generation_kwargs["image"] = processed_image
|
428 |
frames = self.image_to_video(**generation_kwargs).frames
|
|
|
70 |
#print_directory_structure("/repository")
|
71 |
|
72 |
|
73 |
+
def process_input_image(image_data: str, target_width: int, target_height: int, input_image_quality: int) -> Image.Image:
|
74 |
"""
|
75 |
Process input image from base64, resize and crop to target dimensions
|
76 |
|
|
|
78 |
image_data: Base64 encoded image data
|
79 |
target_width: Desired width
|
80 |
target_height: Desired height
|
81 |
+
fake_video_input: bool
|
82 |
|
83 |
Returns:
|
84 |
Processed PIL Image
|
|
|
128 |
bottom = top + target_height
|
129 |
|
130 |
image = image.crop((left, top, right, bottom))
|
131 |
+
|
132 |
+
# Apply JPEG compression if input_image_quality is not 100
|
133 |
+
if input_image_quality < 100:
|
134 |
+
# Save with compression to bytes buffer
|
135 |
+
buffer = io.BytesIO()
|
136 |
+
image.save(buffer, format='JPEG', quality=input_image_quality)
|
137 |
+
buffer.seek(0)
|
138 |
+
# Load compressed image back
|
139 |
+
image = Image.open(buffer)
|
140 |
|
141 |
return image
|
142 |
|
|
|
156 |
width: int = 768
|
157 |
height: int = 416
|
158 |
|
159 |
+
# this is a trick we use to convert a "pristine" image into a "dirty" video frame
|
160 |
+
# this helps fooling LTX-Video into turning the image into an animated one
|
161 |
+
input_image_quality: int = 100
|
162 |
+
|
163 |
# users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
|
164 |
# The value must be a multiple of 8, plus 1 frame.
|
165 |
# visual glitches appear after about 169 frames, so we don't need more actually
|
|
|
333 |
- negative_prompt (optional, string): list of concepts to ignore in the video.
|
334 |
- width (optional, int, default to 768): width, or horizontal size in pixels.
|
335 |
- height (optional, int, default to 512): height, or vertical size in pixels.
|
336 |
+
- input_image_quality (optional, int, default to 100): this is a trick we use to convert a "pristine" image into a "dirty" video frame. This helps fooling LTX-Video into turning the image into an animated one.
|
337 |
- num_frames (optional, int, default to 129): the numer of frames must be a multiple of 8, plus 1 frame.
|
338 |
- guidance_scale (optional, float, default to 3.5): Guidance scale (values between 3.0 and 4.0 are nice)
|
339 |
- num_inference_steps (optional, int, default to 50): number of inference steps
|
|
|
377 |
# video model settings (will be used during generation of the initial raw video clip)
|
378 |
width=params.get("width", GenerationConfig.width),
|
379 |
height=params.get("height", GenerationConfig.height),
|
380 |
+
input_image_quality=params.get("input_image_quality", GenerationConfig.input_image_quality),
|
381 |
num_frames=params.get("num_frames", GenerationConfig.num_frames),
|
382 |
guidance_scale=params.get("guidance_scale", GenerationConfig.guidance_scale),
|
383 |
num_inference_steps=params.get("num_inference_steps", GenerationConfig.num_inference_steps),
|
|
|
438 |
processed_image = process_input_image(
|
439 |
input_image,
|
440 |
config.width,
|
441 |
+
config.height,
|
442 |
+
config.convert_input_image_into_fake_video_frame,
|
443 |
)
|
444 |
generation_kwargs["image"] = processed_image
|
445 |
frames = self.image_to_video(**generation_kwargs).frames
|