jbilcke-hf HF staff commited on
Commit
25b26b8
·
verified ·
1 Parent(s): 73b7f0a

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +19 -2
handler.py CHANGED
@@ -70,7 +70,7 @@ apply_dirty_hack_to_patch_file_extensions_and_bypass_filter("/repository")
70
  #print_directory_structure("/repository")
71
 
72
 
73
- def process_input_image(image_data: str, target_width: int, target_height: int) -> Image.Image:
74
  """
75
  Process input image from base64, resize and crop to target dimensions
76
 
@@ -78,6 +78,7 @@ def process_input_image(image_data: str, target_width: int, target_height: int)
78
  image_data: Base64 encoded image data
79
  target_width: Desired width
80
  target_height: Desired height
 
81
 
82
  Returns:
83
  Processed PIL Image
@@ -127,6 +128,15 @@ def process_input_image(image_data: str, target_width: int, target_height: int)
127
  bottom = top + target_height
128
 
129
  image = image.crop((left, top, right, bottom))
 
 
 
 
 
 
 
 
 
130
 
131
  return image
132
 
@@ -146,6 +156,10 @@ class GenerationConfig:
146
  width: int = 768
147
  height: int = 416
148
 
 
 
 
 
149
  # users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
150
  # The value must be a multiple of 8, plus 1 frame.
151
  # visual glitches appear after about 169 frames, so we don't need more actually
@@ -319,6 +333,7 @@ class EndpointHandler:
319
  - negative_prompt (optional, string): list of concepts to ignore in the video.
320
  - width (optional, int, default to 768): width, or horizontal size in pixels.
321
  - height (optional, int, default to 512): height, or vertical size in pixels.
 
322
  - num_frames (optional, int, default to 129): the numer of frames must be a multiple of 8, plus 1 frame.
323
  - guidance_scale (optional, float, default to 3.5): Guidance scale (values between 3.0 and 4.0 are nice)
324
  - num_inference_steps (optional, int, default to 50): number of inference steps
@@ -362,6 +377,7 @@ class EndpointHandler:
362
  # video model settings (will be used during generation of the initial raw video clip)
363
  width=params.get("width", GenerationConfig.width),
364
  height=params.get("height", GenerationConfig.height),
 
365
  num_frames=params.get("num_frames", GenerationConfig.num_frames),
366
  guidance_scale=params.get("guidance_scale", GenerationConfig.guidance_scale),
367
  num_inference_steps=params.get("num_inference_steps", GenerationConfig.num_inference_steps),
@@ -422,7 +438,8 @@ class EndpointHandler:
422
  processed_image = process_input_image(
423
  input_image,
424
  config.width,
425
- config.height
 
426
  )
427
  generation_kwargs["image"] = processed_image
428
  frames = self.image_to_video(**generation_kwargs).frames
 
70
  #print_directory_structure("/repository")
71
 
72
 
73
+ def process_input_image(image_data: str, target_width: int, target_height: int, input_image_quality: int) -> Image.Image:
74
  """
75
  Process input image from base64, resize and crop to target dimensions
76
 
 
78
  image_data: Base64 encoded image data
79
  target_width: Desired width
80
  target_height: Desired height
81
+ fake_video_input: bool
82
 
83
  Returns:
84
  Processed PIL Image
 
128
  bottom = top + target_height
129
 
130
  image = image.crop((left, top, right, bottom))
131
+
132
+ # Apply JPEG compression if input_image_quality is not 100
133
+ if input_image_quality < 100:
134
+ # Save with compression to bytes buffer
135
+ buffer = io.BytesIO()
136
+ image.save(buffer, format='JPEG', quality=input_image_quality)
137
+ buffer.seek(0)
138
+ # Load compressed image back
139
+ image = Image.open(buffer)
140
 
141
  return image
142
 
 
156
  width: int = 768
157
  height: int = 416
158
 
159
+ # this is a trick we use to convert a "pristine" image into a "dirty" video frame
160
+ # this helps fooling LTX-Video into turning the image into an animated one
161
+ input_image_quality: int = 100
162
+
163
  # users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
164
  # The value must be a multiple of 8, plus 1 frame.
165
  # visual glitches appear after about 169 frames, so we don't need more actually
 
333
  - negative_prompt (optional, string): list of concepts to ignore in the video.
334
  - width (optional, int, default to 768): width, or horizontal size in pixels.
335
  - height (optional, int, default to 512): height, or vertical size in pixels.
336
+ - input_image_quality (optional, int, default to 100): this is a trick we use to convert a "pristine" image into a "dirty" video frame. This helps fooling LTX-Video into turning the image into an animated one.
337
  - num_frames (optional, int, default to 129): the numer of frames must be a multiple of 8, plus 1 frame.
338
  - guidance_scale (optional, float, default to 3.5): Guidance scale (values between 3.0 and 4.0 are nice)
339
  - num_inference_steps (optional, int, default to 50): number of inference steps
 
377
  # video model settings (will be used during generation of the initial raw video clip)
378
  width=params.get("width", GenerationConfig.width),
379
  height=params.get("height", GenerationConfig.height),
380
+ input_image_quality=params.get("input_image_quality", GenerationConfig.input_image_quality),
381
  num_frames=params.get("num_frames", GenerationConfig.num_frames),
382
  guidance_scale=params.get("guidance_scale", GenerationConfig.guidance_scale),
383
  num_inference_steps=params.get("num_inference_steps", GenerationConfig.num_inference_steps),
 
438
  processed_image = process_input_image(
439
  input_image,
440
  config.width,
441
+ config.height,
442
+ config.convert_input_image_into_fake_video_frame,
443
  )
444
  generation_kwargs["image"] = processed_image
445
  frames = self.image_to_video(**generation_kwargs).frames