Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -314,39 +314,34 @@ def create_readme(info: Dict[str, Any], downloaded_files: Dict[str, Any], user_r
|
|
| 314 |
link_civit_disclaimer = f'([CivitAI]({original_url}))'
|
| 315 |
non_author_disclaimer = f'This model was originally uploaded on [CivitAI]({original_url}), by [{info["creator"]}](https://civitai.com/user/{info["creator"]}/models). The information below was provided by the author on CivitAI:'
|
| 316 |
|
| 317 |
-
# Tags
|
| 318 |
is_video = info.get("is_video_model", False)
|
| 319 |
-
base_hf_model = info["baseModel"]
|
| 320 |
civitai_bm_name_lower = info.get("civitai_base_model_name", "").lower()
|
| 321 |
|
| 322 |
if is_video:
|
| 323 |
default_tags = ["lora", "diffusers", "migrated", "video"]
|
| 324 |
-
if "template:" not in " ".join(info
|
| 325 |
-
default_tags.append("template:video-lora")
|
| 326 |
if "t2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo")):
|
| 327 |
default_tags.append("text-to-video")
|
| 328 |
elif "i2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo-I2V")):
|
| 329 |
default_tags.append("image-to-video")
|
| 330 |
else:
|
| 331 |
default_tags = ["text-to-image", "stable-diffusion", "lora", "diffusers", "migrated"]
|
| 332 |
-
if "template:" not in " ".join(info
|
| 333 |
default_tags.append("template:sd-lora")
|
| 334 |
|
| 335 |
-
|
| 336 |
civit_tags_raw = info.get("tags", [])
|
| 337 |
-
civit_tags_clean = [t.replace(":", "").strip() for t in civit_tags_raw if t.replace(":", "").strip()]
|
| 338 |
-
# Filter out tags already covered by default_tags logic (e.g. 'text-to-image', 'lora')
|
| 339 |
final_civit_tags = [tag for tag in civit_tags_clean if tag not in default_tags and tag.lower() not in default_tags]
|
| 340 |
-
|
| 341 |
tags = default_tags + final_civit_tags
|
| 342 |
-
unpacked_tags = "\n- ".join(sorted(list(set(tags))))
|
| 343 |
|
| 344 |
trained_words = info.get('trainedWords', [])
|
| 345 |
-
formatted_words = ', '.join(f'`{word}`' for word in trained_words if word)
|
| 346 |
trigger_words_section = f"## Trigger words\nYou should use {formatted_words} to trigger the generation." if formatted_words else ""
|
| 347 |
|
| 348 |
widget_content = ""
|
| 349 |
-
# Limit number of widget items to avoid overly long READMEs, e.g., max 5
|
| 350 |
max_widget_items = 5
|
| 351 |
items_for_widget = list(zip(
|
| 352 |
downloaded_files.get("imagePrompt", []),
|
|
@@ -355,70 +350,292 @@ def create_readme(info: Dict[str, Any], downloaded_files: Dict[str, Any], user_r
|
|
| 355 |
))[:max_widget_items]
|
| 356 |
|
| 357 |
for index, (prompt, negative_prompt, media_filename) in enumerate(items_for_widget):
|
| 358 |
-
escaped_prompt = prompt.replace("'", "''") if prompt else ' '
|
| 359 |
-
|
| 360 |
-
# Ensure media_filename is just the filename, not a path
|
| 361 |
base_media_filename = os.path.basename(media_filename)
|
| 362 |
-
|
| 363 |
negative_prompt_content = f"negative_prompt: {negative_prompt}\n" if negative_prompt else ""
|
|
|
|
| 364 |
widget_content += f"""- text: '{escaped_prompt}'
|
| 365 |
-
{
|
| 366 |
output:
|
| 367 |
url: >-
|
| 368 |
{base_media_filename}
|
| 369 |
"""
|
| 370 |
-
|
| 371 |
if base_hf_model in ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-schnell"]:
|
| 372 |
dtype = "torch.bfloat16"
|
| 373 |
else:
|
| 374 |
-
dtype = "torch.float16"
|
| 375 |
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
if items_for_widget and items_for_widget[0][0]: # items_for_widget[0][0] is the prompt of the first media
|
| 380 |
-
main_prompt_for_snippet = items_for_widget[0][0]
|
| 381 |
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
pipeline_class = "AutoPipelineForTextToVideo" # Default for T2V
|
| 385 |
-
example_input = f"'{main_prompt_for_snippet}'"
|
| 386 |
-
output_name = "video_frames"
|
| 387 |
-
output_access = ".frames"
|
| 388 |
-
|
| 389 |
-
if "I2V" in base_hf_model or "i2v" in civitai_bm_name_lower:
|
| 390 |
-
pipeline_class = "AutoPipelineForVideoToVideo" # Or ImageToVideo if more specific class exists
|
| 391 |
-
example_input = f"prompt='{main_prompt_for_snippet}', image=your_input_image_or_pil" # I2V needs an image
|
| 392 |
-
# For I2V, .frames might still be correct but input changes.
|
| 393 |
-
|
| 394 |
-
# Handle Hunyuan specifically for more accurate snippet if possible
|
| 395 |
-
if "HunyuanVideo" in base_hf_model:
|
| 396 |
-
if base_hf_model.endswith("HunyuanVideo"): # T2V
|
| 397 |
-
pipeline_class = "HunyuanDiT2V Pipeline" # from hunyuanvideo_community.pipelines.hunyuan_dit_t2v_pipeline import HunyuanDiT2V Pipeline
|
| 398 |
-
example_input = f"prompt='{main_prompt_for_snippet}', height=576, width=1024, num_frames=16, num_inference_steps=50, guidance_scale=7.5" # Example params
|
| 399 |
-
else: # I2V
|
| 400 |
-
pipeline_class = "HunyuanDiI2V Pipeline" # from hunyuanvideo_community.pipelines.hunyuan_dit_i2v_pipeline import HunyuanDiI2V Pipeline
|
| 401 |
-
example_input = f"pil_image, prompt='{main_prompt_for_snippet}', height=576, width=1024, num_frames=16, num_inference_steps=50, guidance_scale=7.5, strength=0.8" # Example params
|
| 402 |
|
| 403 |
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
```py
|
| 406 |
# This is a video LoRA. Diffusers usage for video models can vary.
|
| 407 |
-
# You may need to install/import specific pipeline classes.
|
| 408 |
-
#
|
| 409 |
-
from diffusers import {pipeline_class.split()[0]} # Adjust if pipeline_class includes more than just class name
|
| 410 |
import torch
|
| 411 |
-
|
| 412 |
|
| 413 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 414 |
-
# pil_image = ... # Load your input image PIL here if it's an Image-to-Video model
|
| 415 |
|
| 416 |
-
pipeline =
|
| 417 |
-
|
| 418 |
|
| 419 |
# The following generation command is an example and may need adjustments
|
| 420 |
-
# based on the specific pipeline and its required parameters.
|
| 421 |
-
#
|
| 422 |
# For more details, consult the Hugging Face Hub page for {base_hf_model}
|
| 423 |
# and the Diffusers documentation on LoRAs and video pipelines.
|
| 424 |
```
|
|
@@ -432,7 +649,7 @@ import torch
|
|
| 432 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 433 |
|
| 434 |
pipeline = AutoPipelineForText2Image.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
| 435 |
-
|
| 436 |
image = pipeline('{main_prompt_for_snippet}').images[0]
|
| 437 |
```
|
| 438 |
"""
|
|
@@ -440,31 +657,22 @@ image = pipeline('{main_prompt_for_snippet}').images[0]
|
|
| 440 |
license_map_simple = {
|
| 441 |
"Public Domain": "public-domain",
|
| 442 |
"CreativeML Open RAIL-M": "creativeml-openrail-m",
|
| 443 |
-
"CreativeML Open RAIL++-M": "creativeml-openrail-m",
|
| 444 |
"openrail": "creativeml-openrail-m",
|
| 445 |
-
"SDXL": "sdxl", # This might be a base model, not a license
|
| 446 |
-
# Add more mappings if CivitAI provides other common license names
|
| 447 |
}
|
| 448 |
-
|
| 449 |
-
# "allowCommercialUse": ["Image", "RentCivit", "Rent", "Sell"] or "None", "Sell" etc.
|
| 450 |
-
commercial_use = info.get("allowCommercialUse", "None") # Default to None if not specified
|
| 451 |
license_identifier = "other"
|
| 452 |
-
license_name = "bespoke-lora-trained-license"
|
| 453 |
|
| 454 |
-
# Heuristic for common licenses based on permissions
|
| 455 |
if isinstance(commercial_use, str) and commercial_use.lower() == "none" and not info.get("allowDerivatives", True):
|
| 456 |
-
license_identifier = "creativeml-openrail-m"
|
| 457 |
license_name = "CreativeML OpenRAIL-M"
|
| 458 |
-
|
| 459 |
-
# This is a very permissive license, could be Apache 2.0 or MIT if source code, but for models, 'other' is safer
|
| 460 |
-
pass # Keep bespoke for now
|
| 461 |
-
|
| 462 |
bespoke_license_link = f"https://multimodal.art/civitai-licenses?allowNoCredit={info['allowNoCredit']}&allowCommercialUse={commercial_use[0] if isinstance(commercial_use, list) and commercial_use else (commercial_use if isinstance(commercial_use, str) else 'None')}&allowDerivatives={info['allowDerivatives']}&allowDifferentLicense={info['allowDifferentLicense']}"
|
| 463 |
|
| 464 |
-
|
| 465 |
content = f"""---
|
| 466 |
license: {license_identifier}
|
| 467 |
-
license_name: "{license_name}"
|
| 468 |
license_link: {bespoke_license_link}
|
| 469 |
tags:
|
| 470 |
- {unpacked_tags}
|
|
@@ -472,7 +680,8 @@ tags:
|
|
| 472 |
base_model: {base_hf_model}
|
| 473 |
instance_prompt: {trained_words[0] if trained_words else ''}
|
| 474 |
widget:
|
| 475 |
-
{widget_content}
|
|
|
|
| 476 |
|
| 477 |
# {info["name"]}
|
| 478 |
|
|
@@ -496,11 +705,10 @@ For more details, including weighting, merging and fusing LoRAs, check the [docu
|
|
| 496 |
"""
|
| 497 |
readme_content += content + "\n"
|
| 498 |
readme_path = os.path.join(folder, "README.md")
|
| 499 |
-
with open(readme_path, "w", encoding="utf-8") as file:
|
| 500 |
file.write(readme_content)
|
| 501 |
print(f"README.md created at {readme_path}")
|
| 502 |
-
print(f"README.md content
|
| 503 |
-
|
| 504 |
|
| 505 |
def get_creator(username):
|
| 506 |
url = f"https://civitai.com/api/trpc/user.getCreator?input=%7B%22json%22%3A%7B%22username%22%3A%22{username}%22%2C%22authed%22%3Atrue%7D%7D"
|
|
|
|
| 314 |
link_civit_disclaimer = f'([CivitAI]({original_url}))'
|
| 315 |
non_author_disclaimer = f'This model was originally uploaded on [CivitAI]({original_url}), by [{info["creator"]}](https://civitai.com/user/{info["creator"]}/models). The information below was provided by the author on CivitAI:'
|
| 316 |
|
|
|
|
| 317 |
is_video = info.get("is_video_model", False)
|
| 318 |
+
base_hf_model = info["baseModel"] # This is the HF model ID
|
| 319 |
civitai_bm_name_lower = info.get("civitai_base_model_name", "").lower()
|
| 320 |
|
| 321 |
if is_video:
|
| 322 |
default_tags = ["lora", "diffusers", "migrated", "video"]
|
| 323 |
+
if "template:" not in " ".join(info.get("tags", [])):
|
| 324 |
+
default_tags.append("template:video-lora")
|
| 325 |
if "t2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo")):
|
| 326 |
default_tags.append("text-to-video")
|
| 327 |
elif "i2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo-I2V")):
|
| 328 |
default_tags.append("image-to-video")
|
| 329 |
else:
|
| 330 |
default_tags = ["text-to-image", "stable-diffusion", "lora", "diffusers", "migrated"]
|
| 331 |
+
if "template:" not in " ".join(info.get("tags", [])):
|
| 332 |
default_tags.append("template:sd-lora")
|
| 333 |
|
|
|
|
| 334 |
civit_tags_raw = info.get("tags", [])
|
| 335 |
+
civit_tags_clean = [t.replace(":", "").strip() for t in civit_tags_raw if t.replace(":", "").strip()]
|
|
|
|
| 336 |
final_civit_tags = [tag for tag in civit_tags_clean if tag not in default_tags and tag.lower() not in default_tags]
|
|
|
|
| 337 |
tags = default_tags + final_civit_tags
|
| 338 |
+
unpacked_tags = "\n- ".join(sorted(list(set(tags))))
|
| 339 |
|
| 340 |
trained_words = info.get('trainedWords', [])
|
| 341 |
+
formatted_words = ', '.join(f'`{word}`' for word in trained_words if word)
|
| 342 |
trigger_words_section = f"## Trigger words\nYou should use {formatted_words} to trigger the generation." if formatted_words else ""
|
| 343 |
|
| 344 |
widget_content = ""
|
|
|
|
| 345 |
max_widget_items = 5
|
| 346 |
items_for_widget = list(zip(
|
| 347 |
downloaded_files.get("imagePrompt", []),
|
|
|
|
| 350 |
))[:max_widget_items]
|
| 351 |
|
| 352 |
for index, (prompt, negative_prompt, media_filename) in enumerate(items_for_widget):
|
| 353 |
+
escaped_prompt = prompt.replace("'", "''") if prompt else ' '
|
|
|
|
|
|
|
| 354 |
base_media_filename = os.path.basename(media_filename)
|
|
|
|
| 355 |
negative_prompt_content = f"negative_prompt: {negative_prompt}\n" if negative_prompt else ""
|
| 356 |
+
# Corrected YAML for widget:
|
| 357 |
widget_content += f"""- text: '{escaped_prompt}'
|
| 358 |
+
{negative_prompt}
|
| 359 |
output:
|
| 360 |
url: >-
|
| 361 |
{base_media_filename}
|
| 362 |
"""
|
| 363 |
+
|
| 364 |
if base_hf_model in ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-schnell"]:
|
| 365 |
dtype = "torch.bfloat16"
|
| 366 |
else:
|
| 367 |
+
dtype = "torch.float16" # Default for others, Hunyuan examples specify this.
|
| 368 |
|
| 369 |
+
main_prompt_for_snippet_raw = formatted_words if formatted_words else 'Your custom prompt'
|
| 370 |
+
if items_for_widget and items_for_widget[0][0]:
|
| 371 |
+
main_prompt_for_snippet_raw = items_for_widget[0][0]
|
|
|
|
|
|
|
| 372 |
|
| 373 |
+
# Escape single quotes for Python string literals
|
| 374 |
+
main_prompt_for_snippet = main_prompt_for_snippet_raw.replace("'", "\\'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
|
| 377 |
+
lora_loader_line = f"pipe.load_lora_weights('{user_repo_id}', weight_name='{downloaded_files.get('weightName', ['your_lora.safetensors'])[0]}')"
|
| 378 |
+
|
| 379 |
+
diffusers_example = ""
|
| 380 |
+
if is_video:
|
| 381 |
+
if base_hf_model == "hunyuanvideo-community/HunyuanVideo-I2V":
|
| 382 |
+
diffusers_example = f"""
|
| 383 |
+
```py
|
| 384 |
+
import torch
|
| 385 |
+
from diffusers import HunyuanVideoImageToVideoPipeline, HunyuanVideoTransformer3DModel
|
| 386 |
+
from diffusers.utils import load_image, export_to_video
|
| 387 |
+
|
| 388 |
+
# Available checkpoints: "hunyuanvideo-community/HunyuanVideo-I2V" and "hunyuanvideo-community/HunyuanVideo-I2V-33ch"
|
| 389 |
+
model_id = "{base_hf_model}"
|
| 390 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
| 391 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16 # Explicitly bfloat16 for transformer
|
| 392 |
+
)
|
| 393 |
+
pipe = HunyuanVideoImageToVideoPipeline.from_pretrained(
|
| 394 |
+
model_id, transformer=transformer, torch_dtype=torch.float16 # float16 for pipeline
|
| 395 |
+
)
|
| 396 |
+
pipe.vae.enable_tiling()
|
| 397 |
+
{lora_loader_line}
|
| 398 |
+
pipe.to("cuda")
|
| 399 |
+
|
| 400 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A detailed scene description'}"
|
| 401 |
+
# Replace with your image path or URL
|
| 402 |
+
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png"
|
| 403 |
+
image = load_image(image_url)
|
| 404 |
+
|
| 405 |
+
output = pipe(image=image, prompt=prompt).frames[0]
|
| 406 |
+
export_to_video(output, "output.mp4", fps=15)
|
| 407 |
+
```
|
| 408 |
+
"""
|
| 409 |
+
elif base_hf_model == "hunyuanvideo-community/HunyuanVideo":
|
| 410 |
+
diffusers_example = f"""
|
| 411 |
+
```py
|
| 412 |
+
import torch
|
| 413 |
+
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
|
| 414 |
+
from diffusers.utils import export_to_video
|
| 415 |
+
|
| 416 |
+
model_id = "{base_hf_model}"
|
| 417 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
| 418 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16
|
| 419 |
+
)
|
| 420 |
+
pipe = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
|
| 421 |
+
{lora_loader_line}
|
| 422 |
+
# Enable memory savings
|
| 423 |
+
pipe.vae.enable_tiling()
|
| 424 |
+
pipe.enable_model_cpu_offload() # Optional: if VRAM is limited
|
| 425 |
+
|
| 426 |
+
output = pipe(
|
| 427 |
+
prompt="{main_prompt_for_snippet if main_prompt_for_snippet else 'A cinematic video scene'}",
|
| 428 |
+
height=320, # Adjust as needed
|
| 429 |
+
width=512, # Adjust as needed
|
| 430 |
+
num_frames=61, # Adjust as needed
|
| 431 |
+
num_inference_steps=30, # Adjust as needed
|
| 432 |
+
).frames[0]
|
| 433 |
+
export_to_video(output, "output.mp4", fps=15)
|
| 434 |
+
```
|
| 435 |
+
"""
|
| 436 |
+
elif base_hf_model == "Lightricks/LTX-Video-0.9.7-dev" or base_hf_model == "Lightricks/LTX-Video-0.9.7-distilled": # Assuming -dev is the one from mapping
|
| 437 |
+
# Note: The LTX example is complex. We'll simplify a bit for a LoRA example.
|
| 438 |
+
# The user might need to adapt the full pipeline if they used the distilled one directly.
|
| 439 |
+
# We assume the LoRA is trained on the main LTX pipeline.
|
| 440 |
+
diffusers_example = f"""
|
| 441 |
+
```py
|
| 442 |
+
import torch
|
| 443 |
+
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
| 444 |
+
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
| 445 |
+
from diffusers.utils import export_to_video, load_image, load_video
|
| 446 |
+
|
| 447 |
+
# Use the base LTX model your LoRA was trained on. The example below uses the distilled version.
|
| 448 |
+
# Adjust if your LoRA is for the non-distilled "Lightricks/LTX-Video-0.9.7-dev".
|
| 449 |
+
pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.bfloat16)
|
| 450 |
+
{lora_loader_line}
|
| 451 |
+
# The LTX upsampler is separate and typically doesn't have LoRAs loaded into it directly.
|
| 452 |
+
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=torch.bfloat16)
|
| 453 |
+
|
| 454 |
+
pipe.to("cuda")
|
| 455 |
+
pipe_upsample.to("cuda")
|
| 456 |
+
pipe.vae.enable_tiling()
|
| 457 |
+
|
| 458 |
+
def round_to_nearest_resolution_acceptable_by_vae(height, width, vae_spatial_compression_ratio):
|
| 459 |
+
height = height - (height % vae_spatial_compression_ratio)
|
| 460 |
+
width = width - (width % vae_spatial_compression_ratio)
|
| 461 |
+
return height, width
|
| 462 |
+
|
| 463 |
+
# Example image for condition (replace with your own)
|
| 464 |
+
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png")
|
| 465 |
+
video_for_condition = load_video(export_to_video([image])) # Create a dummy video for conditioning
|
| 466 |
+
condition1 = LTXVideoCondition(video=video_for_condition, frame_index=0)
|
| 467 |
+
|
| 468 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A cute little penguin takes out a book and starts reading it'}"
|
| 469 |
+
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" # Example
|
| 470 |
+
expected_height, expected_width = 480, 832 # Target final resolution
|
| 471 |
+
downscale_factor = 2 / 3
|
| 472 |
+
num_frames = 32 # Reduced for quicker example
|
| 473 |
+
|
| 474 |
+
# Part 1. Generate video at smaller resolution
|
| 475 |
+
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
| 476 |
+
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width, pipe.vae_spatial_compression_ratio)
|
| 477 |
+
|
| 478 |
+
latents = pipe(
|
| 479 |
+
conditions=[condition1],
|
| 480 |
+
prompt=prompt,
|
| 481 |
+
negative_prompt=negative_prompt,
|
| 482 |
+
width=downscaled_width,
|
| 483 |
+
height=downscaled_height,
|
| 484 |
+
num_frames=num_frames,
|
| 485 |
+
num_inference_steps=7, # Example steps
|
| 486 |
+
guidance_scale=1.0, # Example guidance
|
| 487 |
+
decode_timestep = 0.05,
|
| 488 |
+
decode_noise_scale = 0.025,
|
| 489 |
+
generator=torch.Generator().manual_seed(0),
|
| 490 |
+
output_type="latent",
|
| 491 |
+
).frames
|
| 492 |
+
|
| 493 |
+
# Part 2. Upscale generated video
|
| 494 |
+
upscaled_latents = pipe_upsample(
|
| 495 |
+
latents=latents,
|
| 496 |
+
output_type="latent"
|
| 497 |
+
).frames
|
| 498 |
+
|
| 499 |
+
# Part 3. Denoise the upscaled video (optional, but recommended)
|
| 500 |
+
video_frames = pipe(
|
| 501 |
+
conditions=[condition1],
|
| 502 |
+
prompt=prompt,
|
| 503 |
+
negative_prompt=negative_prompt,
|
| 504 |
+
width=downscaled_width * 2, # Upscaled width
|
| 505 |
+
height=downscaled_height * 2, # Upscaled height
|
| 506 |
+
num_frames=num_frames,
|
| 507 |
+
denoise_strength=0.3,
|
| 508 |
+
num_inference_steps=10,
|
| 509 |
+
guidance_scale=1.0,
|
| 510 |
+
latents=upscaled_latents,
|
| 511 |
+
decode_timestep = 0.05,
|
| 512 |
+
decode_noise_scale = 0.025,
|
| 513 |
+
image_cond_noise_scale=0.025, # if using image condition
|
| 514 |
+
generator=torch.Generator().manual_seed(0),
|
| 515 |
+
output_type="pil",
|
| 516 |
+
).frames[0]
|
| 517 |
+
|
| 518 |
+
# Part 4. Downscale to target resolution if upscaler overshot
|
| 519 |
+
final_video = [frame.resize((expected_width, expected_height)) for frame in video_frames]
|
| 520 |
+
export_to_video(final_video, "output.mp4", fps=16) # Example fps
|
| 521 |
+
```
|
| 522 |
+
"""
|
| 523 |
+
elif base_hf_model.startswith("Wan-AI/Wan2.1-T2V-"):
|
| 524 |
+
diffusers_example = f"""
|
| 525 |
+
```py
|
| 526 |
+
import torch
|
| 527 |
+
from diffusers import AutoencoderKLWan, WanPipeline
|
| 528 |
+
from diffusers.utils import export_to_video
|
| 529 |
+
|
| 530 |
+
model_id = "{base_hf_model}"
|
| 531 |
+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) # As per example
|
| 532 |
+
pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
|
| 533 |
+
{lora_loader_line}
|
| 534 |
+
pipe.to("cuda")
|
| 535 |
+
|
| 536 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A cat walks on the grass, realistic'}"
|
| 537 |
+
negative_prompt = "worst quality, low quality, blurry" # Simplified for LoRA example
|
| 538 |
+
|
| 539 |
+
output = pipe(
|
| 540 |
+
prompt=prompt,
|
| 541 |
+
negative_prompt=negative_prompt,
|
| 542 |
+
height=480, # Adjust as needed
|
| 543 |
+
width=832, # Adjust as needed
|
| 544 |
+
num_frames=30, # Adjust for LoRA, original example had 81
|
| 545 |
+
guidance_scale=5.0 # Adjust as needed
|
| 546 |
+
).frames[0]
|
| 547 |
+
export_to_video(output, "output.mp4", fps=15)
|
| 548 |
+
```
|
| 549 |
+
"""
|
| 550 |
+
elif base_hf_model.startswith("Wan-AI/Wan2.1-I2V-"):
|
| 551 |
+
diffusers_example = f"""
|
| 552 |
+
```py
|
| 553 |
+
import torch
|
| 554 |
+
import numpy as np
|
| 555 |
+
from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
|
| 556 |
+
from diffusers.utils import export_to_video, load_image
|
| 557 |
+
from transformers import CLIPVisionModel
|
| 558 |
+
|
| 559 |
+
model_id = "{base_hf_model}"
|
| 560 |
+
# These components are part of the base model, LoRA is loaded into the pipeline
|
| 561 |
+
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
|
| 562 |
+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
|
| 563 |
+
pipe = WanImageToVideoPipeline.from_pretrained(model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16)
|
| 564 |
+
{lora_loader_line}
|
| 565 |
+
pipe.to("cuda")
|
| 566 |
+
|
| 567 |
+
# Replace with your image path or URL
|
| 568 |
+
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
|
| 569 |
+
image = load_image(image_url)
|
| 570 |
+
|
| 571 |
+
# Adjust resolution based on model capabilities (480p or 720p variants)
|
| 572 |
+
# This is a simplified example; refer to original Wan I2V docs for precise resolution handling
|
| 573 |
+
if "480P" in model_id:
|
| 574 |
+
max_height, max_width = 480, 832 # Example for 480p
|
| 575 |
+
elif "720P" in model_id:
|
| 576 |
+
max_height, max_width = 720, 1280 # Example for 720p
|
| 577 |
+
else: # Fallback
|
| 578 |
+
max_height, max_width = 480, 832
|
| 579 |
+
|
| 580 |
+
# Simple resize for example, optimal resizing might need to maintain aspect ratio & VAE constraints
|
| 581 |
+
h, w = image.height, image.width
|
| 582 |
+
if w > max_width or h > max_height:
|
| 583 |
+
aspect_ratio = w / h
|
| 584 |
+
if w > h:
|
| 585 |
+
new_w = max_width
|
| 586 |
+
new_h = int(new_w / aspect_ratio)
|
| 587 |
+
else:
|
| 588 |
+
new_h = max_height
|
| 589 |
+
new_w = int(new_h * aspect_ratio)
|
| 590 |
+
# Ensure dimensions are divisible by VAE scale factors (typically 8 or 16)
|
| 591 |
+
# This is a basic adjustment, model specific patch sizes might also matter.
|
| 592 |
+
patch_size_factor = 16 # Common factor
|
| 593 |
+
new_h = (new_h // patch_size_factor) * patch_size_factor
|
| 594 |
+
new_w = (new_w // patch_size_factor) * patch_size_factor
|
| 595 |
+
if new_h > 0 and new_w > 0:
|
| 596 |
+
image = image.resize((new_w, new_h))
|
| 597 |
+
else: # Fallback if calculations lead to zero
|
| 598 |
+
image = image.resize((max_width//2, max_height//2)) # A smaller safe default
|
| 599 |
+
else:
|
| 600 |
+
patch_size_factor = 16
|
| 601 |
+
h = (h // patch_size_factor) * patch_size_factor
|
| 602 |
+
w = (w // patch_size_factor) * patch_size_factor
|
| 603 |
+
if h > 0 and w > 0:
|
| 604 |
+
image = image.resize((w,h))
|
| 605 |
+
|
| 606 |
+
|
| 607 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'An astronaut in a dynamic scene'}"
|
| 608 |
+
negative_prompt = "worst quality, low quality, blurry" # Simplified
|
| 609 |
+
|
| 610 |
+
output = pipe(
|
| 611 |
+
image=image,
|
| 612 |
+
prompt=prompt,
|
| 613 |
+
negative_prompt=negative_prompt,
|
| 614 |
+
height=image.height, # Use resized image height
|
| 615 |
+
width=image.width, # Use resized image width
|
| 616 |
+
num_frames=30, # Adjust for LoRA
|
| 617 |
+
guidance_scale=5.0 # Adjust as needed
|
| 618 |
+
).frames[0]
|
| 619 |
+
export_to_video(output, "output.mp4", fps=16)
|
| 620 |
+
```
|
| 621 |
+
"""
|
| 622 |
+
else: # Fallback for other video LoRAs
|
| 623 |
+
diffusers_example = f"""
|
| 624 |
```py
|
| 625 |
# This is a video LoRA. Diffusers usage for video models can vary.
|
| 626 |
+
# You may need to install/import specific pipeline classes from diffusers or the model's community.
|
| 627 |
+
# Below is a generic placeholder.
|
|
|
|
| 628 |
import torch
|
| 629 |
+
from diffusers import AutoPipelineForTextToVideo # Or the appropriate video pipeline
|
| 630 |
|
| 631 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 632 |
|
| 633 |
+
pipeline = AutoPipelineForTextToVideo.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
| 634 |
+
{lora_loader_line}
|
| 635 |
|
| 636 |
# The following generation command is an example and may need adjustments
|
| 637 |
+
# based on the specific pipeline and its required parameters for '{base_hf_model}'.
|
| 638 |
+
# video_frames = pipeline(prompt='{main_prompt_for_snippet}', num_frames=16).frames
|
| 639 |
# For more details, consult the Hugging Face Hub page for {base_hf_model}
|
| 640 |
# and the Diffusers documentation on LoRAs and video pipelines.
|
| 641 |
```
|
|
|
|
| 649 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 650 |
|
| 651 |
pipeline = AutoPipelineForText2Image.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
| 652 |
+
{lora_loader_line}
|
| 653 |
image = pipeline('{main_prompt_for_snippet}').images[0]
|
| 654 |
```
|
| 655 |
"""
|
|
|
|
| 657 |
license_map_simple = {
|
| 658 |
"Public Domain": "public-domain",
|
| 659 |
"CreativeML Open RAIL-M": "creativeml-openrail-m",
|
| 660 |
+
"CreativeML Open RAIL++-M": "creativeml-openrail-m",
|
| 661 |
"openrail": "creativeml-openrail-m",
|
|
|
|
|
|
|
| 662 |
}
|
| 663 |
+
commercial_use = info.get("allowCommercialUse", "None")
|
|
|
|
|
|
|
| 664 |
license_identifier = "other"
|
| 665 |
+
license_name = "bespoke-lora-trained-license"
|
| 666 |
|
|
|
|
| 667 |
if isinstance(commercial_use, str) and commercial_use.lower() == "none" and not info.get("allowDerivatives", True):
|
| 668 |
+
license_identifier = "creativeml-openrail-m"
|
| 669 |
license_name = "CreativeML OpenRAIL-M"
|
| 670 |
+
|
|
|
|
|
|
|
|
|
|
| 671 |
bespoke_license_link = f"https://multimodal.art/civitai-licenses?allowNoCredit={info['allowNoCredit']}&allowCommercialUse={commercial_use[0] if isinstance(commercial_use, list) and commercial_use else (commercial_use if isinstance(commercial_use, str) else 'None')}&allowDerivatives={info['allowDerivatives']}&allowDifferentLicense={info['allowDifferentLicense']}"
|
| 672 |
|
|
|
|
| 673 |
content = f"""---
|
| 674 |
license: {license_identifier}
|
| 675 |
+
license_name: "{license_name}"
|
| 676 |
license_link: {bespoke_license_link}
|
| 677 |
tags:
|
| 678 |
- {unpacked_tags}
|
|
|
|
| 680 |
base_model: {base_hf_model}
|
| 681 |
instance_prompt: {trained_words[0] if trained_words else ''}
|
| 682 |
widget:
|
| 683 |
+
{widget_content.strip()}
|
| 684 |
+
---
|
| 685 |
|
| 686 |
# {info["name"]}
|
| 687 |
|
|
|
|
| 705 |
"""
|
| 706 |
readme_content += content + "\n"
|
| 707 |
readme_path = os.path.join(folder, "README.md")
|
| 708 |
+
with open(readme_path, "w", encoding="utf-8") as file:
|
| 709 |
file.write(readme_content)
|
| 710 |
print(f"README.md created at {readme_path}")
|
| 711 |
+
# print(f"README.md content:\n{readme_content}") # For debugging
|
|
|
|
| 712 |
|
| 713 |
def get_creator(username):
|
| 714 |
url = f"https://civitai.com/api/trpc/user.getCreator?input=%7B%22json%22%3A%7B%22username%22%3A%22{username}%22%2C%22authed%22%3Atrue%7D%7D"
|