Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -314,39 +314,34 @@ def create_readme(info: Dict[str, Any], downloaded_files: Dict[str, Any], user_r
|
|
314 |
link_civit_disclaimer = f'([CivitAI]({original_url}))'
|
315 |
non_author_disclaimer = f'This model was originally uploaded on [CivitAI]({original_url}), by [{info["creator"]}](https://civitai.com/user/{info["creator"]}/models). The information below was provided by the author on CivitAI:'
|
316 |
|
317 |
-
# Tags
|
318 |
is_video = info.get("is_video_model", False)
|
319 |
-
base_hf_model = info["baseModel"]
|
320 |
civitai_bm_name_lower = info.get("civitai_base_model_name", "").lower()
|
321 |
|
322 |
if is_video:
|
323 |
default_tags = ["lora", "diffusers", "migrated", "video"]
|
324 |
-
if "template:" not in " ".join(info
|
325 |
-
default_tags.append("template:video-lora")
|
326 |
if "t2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo")):
|
327 |
default_tags.append("text-to-video")
|
328 |
elif "i2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo-I2V")):
|
329 |
default_tags.append("image-to-video")
|
330 |
else:
|
331 |
default_tags = ["text-to-image", "stable-diffusion", "lora", "diffusers", "migrated"]
|
332 |
-
if "template:" not in " ".join(info
|
333 |
default_tags.append("template:sd-lora")
|
334 |
|
335 |
-
|
336 |
civit_tags_raw = info.get("tags", [])
|
337 |
-
civit_tags_clean = [t.replace(":", "").strip() for t in civit_tags_raw if t.replace(":", "").strip()]
|
338 |
-
# Filter out tags already covered by default_tags logic (e.g. 'text-to-image', 'lora')
|
339 |
final_civit_tags = [tag for tag in civit_tags_clean if tag not in default_tags and tag.lower() not in default_tags]
|
340 |
-
|
341 |
tags = default_tags + final_civit_tags
|
342 |
-
unpacked_tags = "\n- ".join(sorted(list(set(tags))))
|
343 |
|
344 |
trained_words = info.get('trainedWords', [])
|
345 |
-
formatted_words = ', '.join(f'`{word}`' for word in trained_words if word)
|
346 |
trigger_words_section = f"## Trigger words\nYou should use {formatted_words} to trigger the generation." if formatted_words else ""
|
347 |
|
348 |
widget_content = ""
|
349 |
-
# Limit number of widget items to avoid overly long READMEs, e.g., max 5
|
350 |
max_widget_items = 5
|
351 |
items_for_widget = list(zip(
|
352 |
downloaded_files.get("imagePrompt", []),
|
@@ -355,70 +350,292 @@ def create_readme(info: Dict[str, Any], downloaded_files: Dict[str, Any], user_r
|
|
355 |
))[:max_widget_items]
|
356 |
|
357 |
for index, (prompt, negative_prompt, media_filename) in enumerate(items_for_widget):
|
358 |
-
escaped_prompt = prompt.replace("'", "''") if prompt else ' '
|
359 |
-
|
360 |
-
# Ensure media_filename is just the filename, not a path
|
361 |
base_media_filename = os.path.basename(media_filename)
|
362 |
-
|
363 |
negative_prompt_content = f"negative_prompt: {negative_prompt}\n" if negative_prompt else ""
|
|
|
364 |
widget_content += f"""- text: '{escaped_prompt}'
|
365 |
-
{
|
366 |
output:
|
367 |
url: >-
|
368 |
{base_media_filename}
|
369 |
"""
|
370 |
-
|
371 |
if base_hf_model in ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-schnell"]:
|
372 |
dtype = "torch.bfloat16"
|
373 |
else:
|
374 |
-
dtype = "torch.float16"
|
375 |
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
if items_for_widget and items_for_widget[0][0]: # items_for_widget[0][0] is the prompt of the first media
|
380 |
-
main_prompt_for_snippet = items_for_widget[0][0]
|
381 |
|
382 |
-
|
383 |
-
|
384 |
-
pipeline_class = "AutoPipelineForTextToVideo" # Default for T2V
|
385 |
-
example_input = f"'{main_prompt_for_snippet}'"
|
386 |
-
output_name = "video_frames"
|
387 |
-
output_access = ".frames"
|
388 |
-
|
389 |
-
if "I2V" in base_hf_model or "i2v" in civitai_bm_name_lower:
|
390 |
-
pipeline_class = "AutoPipelineForVideoToVideo" # Or ImageToVideo if more specific class exists
|
391 |
-
example_input = f"prompt='{main_prompt_for_snippet}', image=your_input_image_or_pil" # I2V needs an image
|
392 |
-
# For I2V, .frames might still be correct but input changes.
|
393 |
-
|
394 |
-
# Handle Hunyuan specifically for more accurate snippet if possible
|
395 |
-
if "HunyuanVideo" in base_hf_model:
|
396 |
-
if base_hf_model.endswith("HunyuanVideo"): # T2V
|
397 |
-
pipeline_class = "HunyuanDiT2V Pipeline" # from hunyuanvideo_community.pipelines.hunyuan_dit_t2v_pipeline import HunyuanDiT2V Pipeline
|
398 |
-
example_input = f"prompt='{main_prompt_for_snippet}', height=576, width=1024, num_frames=16, num_inference_steps=50, guidance_scale=7.5" # Example params
|
399 |
-
else: # I2V
|
400 |
-
pipeline_class = "HunyuanDiI2V Pipeline" # from hunyuanvideo_community.pipelines.hunyuan_dit_i2v_pipeline import HunyuanDiI2V Pipeline
|
401 |
-
example_input = f"pil_image, prompt='{main_prompt_for_snippet}', height=576, width=1024, num_frames=16, num_inference_steps=50, guidance_scale=7.5, strength=0.8" # Example params
|
402 |
|
403 |
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
```py
|
406 |
# This is a video LoRA. Diffusers usage for video models can vary.
|
407 |
-
# You may need to install/import specific pipeline classes.
|
408 |
-
#
|
409 |
-
from diffusers import {pipeline_class.split()[0]} # Adjust if pipeline_class includes more than just class name
|
410 |
import torch
|
411 |
-
|
412 |
|
413 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
414 |
-
# pil_image = ... # Load your input image PIL here if it's an Image-to-Video model
|
415 |
|
416 |
-
pipeline =
|
417 |
-
|
418 |
|
419 |
# The following generation command is an example and may need adjustments
|
420 |
-
# based on the specific pipeline and its required parameters.
|
421 |
-
#
|
422 |
# For more details, consult the Hugging Face Hub page for {base_hf_model}
|
423 |
# and the Diffusers documentation on LoRAs and video pipelines.
|
424 |
```
|
@@ -432,7 +649,7 @@ import torch
|
|
432 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
433 |
|
434 |
pipeline = AutoPipelineForText2Image.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
435 |
-
|
436 |
image = pipeline('{main_prompt_for_snippet}').images[0]
|
437 |
```
|
438 |
"""
|
@@ -440,31 +657,22 @@ image = pipeline('{main_prompt_for_snippet}').images[0]
|
|
440 |
license_map_simple = {
|
441 |
"Public Domain": "public-domain",
|
442 |
"CreativeML Open RAIL-M": "creativeml-openrail-m",
|
443 |
-
"CreativeML Open RAIL++-M": "creativeml-openrail-m",
|
444 |
"openrail": "creativeml-openrail-m",
|
445 |
-
"SDXL": "sdxl", # This might be a base model, not a license
|
446 |
-
# Add more mappings if CivitAI provides other common license names
|
447 |
}
|
448 |
-
|
449 |
-
# "allowCommercialUse": ["Image", "RentCivit", "Rent", "Sell"] or "None", "Sell" etc.
|
450 |
-
commercial_use = info.get("allowCommercialUse", "None") # Default to None if not specified
|
451 |
license_identifier = "other"
|
452 |
-
license_name = "bespoke-lora-trained-license"
|
453 |
|
454 |
-
# Heuristic for common licenses based on permissions
|
455 |
if isinstance(commercial_use, str) and commercial_use.lower() == "none" and not info.get("allowDerivatives", True):
|
456 |
-
license_identifier = "creativeml-openrail-m"
|
457 |
license_name = "CreativeML OpenRAIL-M"
|
458 |
-
|
459 |
-
# This is a very permissive license, could be Apache 2.0 or MIT if source code, but for models, 'other' is safer
|
460 |
-
pass # Keep bespoke for now
|
461 |
-
|
462 |
bespoke_license_link = f"https://multimodal.art/civitai-licenses?allowNoCredit={info['allowNoCredit']}&allowCommercialUse={commercial_use[0] if isinstance(commercial_use, list) and commercial_use else (commercial_use if isinstance(commercial_use, str) else 'None')}&allowDerivatives={info['allowDerivatives']}&allowDifferentLicense={info['allowDifferentLicense']}"
|
463 |
|
464 |
-
|
465 |
content = f"""---
|
466 |
license: {license_identifier}
|
467 |
-
license_name: "{license_name}"
|
468 |
license_link: {bespoke_license_link}
|
469 |
tags:
|
470 |
- {unpacked_tags}
|
@@ -472,7 +680,8 @@ tags:
|
|
472 |
base_model: {base_hf_model}
|
473 |
instance_prompt: {trained_words[0] if trained_words else ''}
|
474 |
widget:
|
475 |
-
{widget_content}
|
|
|
476 |
|
477 |
# {info["name"]}
|
478 |
|
@@ -496,11 +705,10 @@ For more details, including weighting, merging and fusing LoRAs, check the [docu
|
|
496 |
"""
|
497 |
readme_content += content + "\n"
|
498 |
readme_path = os.path.join(folder, "README.md")
|
499 |
-
with open(readme_path, "w", encoding="utf-8") as file:
|
500 |
file.write(readme_content)
|
501 |
print(f"README.md created at {readme_path}")
|
502 |
-
print(f"README.md content
|
503 |
-
|
504 |
|
505 |
def get_creator(username):
|
506 |
url = f"https://civitai.com/api/trpc/user.getCreator?input=%7B%22json%22%3A%7B%22username%22%3A%22{username}%22%2C%22authed%22%3Atrue%7D%7D"
|
|
|
314 |
link_civit_disclaimer = f'([CivitAI]({original_url}))'
|
315 |
non_author_disclaimer = f'This model was originally uploaded on [CivitAI]({original_url}), by [{info["creator"]}](https://civitai.com/user/{info["creator"]}/models). The information below was provided by the author on CivitAI:'
|
316 |
|
|
|
317 |
is_video = info.get("is_video_model", False)
|
318 |
+
base_hf_model = info["baseModel"] # This is the HF model ID
|
319 |
civitai_bm_name_lower = info.get("civitai_base_model_name", "").lower()
|
320 |
|
321 |
if is_video:
|
322 |
default_tags = ["lora", "diffusers", "migrated", "video"]
|
323 |
+
if "template:" not in " ".join(info.get("tags", [])):
|
324 |
+
default_tags.append("template:video-lora")
|
325 |
if "t2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo")):
|
326 |
default_tags.append("text-to-video")
|
327 |
elif "i2v" in civitai_bm_name_lower or (civitai_bm_name_lower == "hunyuan video" and base_hf_model.endswith("HunyuanVideo-I2V")):
|
328 |
default_tags.append("image-to-video")
|
329 |
else:
|
330 |
default_tags = ["text-to-image", "stable-diffusion", "lora", "diffusers", "migrated"]
|
331 |
+
if "template:" not in " ".join(info.get("tags", [])):
|
332 |
default_tags.append("template:sd-lora")
|
333 |
|
|
|
334 |
civit_tags_raw = info.get("tags", [])
|
335 |
+
civit_tags_clean = [t.replace(":", "").strip() for t in civit_tags_raw if t.replace(":", "").strip()]
|
|
|
336 |
final_civit_tags = [tag for tag in civit_tags_clean if tag not in default_tags and tag.lower() not in default_tags]
|
|
|
337 |
tags = default_tags + final_civit_tags
|
338 |
+
unpacked_tags = "\n- ".join(sorted(list(set(tags))))
|
339 |
|
340 |
trained_words = info.get('trainedWords', [])
|
341 |
+
formatted_words = ', '.join(f'`{word}`' for word in trained_words if word)
|
342 |
trigger_words_section = f"## Trigger words\nYou should use {formatted_words} to trigger the generation." if formatted_words else ""
|
343 |
|
344 |
widget_content = ""
|
|
|
345 |
max_widget_items = 5
|
346 |
items_for_widget = list(zip(
|
347 |
downloaded_files.get("imagePrompt", []),
|
|
|
350 |
))[:max_widget_items]
|
351 |
|
352 |
for index, (prompt, negative_prompt, media_filename) in enumerate(items_for_widget):
|
353 |
+
escaped_prompt = prompt.replace("'", "''") if prompt else ' '
|
|
|
|
|
354 |
base_media_filename = os.path.basename(media_filename)
|
|
|
355 |
negative_prompt_content = f"negative_prompt: {negative_prompt}\n" if negative_prompt else ""
|
356 |
+
# Corrected YAML for widget:
|
357 |
widget_content += f"""- text: '{escaped_prompt}'
|
358 |
+
{negative_prompt}
|
359 |
output:
|
360 |
url: >-
|
361 |
{base_media_filename}
|
362 |
"""
|
363 |
+
|
364 |
if base_hf_model in ["black-forest-labs/FLUX.1-dev", "black-forest-labs/FLUX.1-schnell"]:
|
365 |
dtype = "torch.bfloat16"
|
366 |
else:
|
367 |
+
dtype = "torch.float16" # Default for others, Hunyuan examples specify this.
|
368 |
|
369 |
+
main_prompt_for_snippet_raw = formatted_words if formatted_words else 'Your custom prompt'
|
370 |
+
if items_for_widget and items_for_widget[0][0]:
|
371 |
+
main_prompt_for_snippet_raw = items_for_widget[0][0]
|
|
|
|
|
372 |
|
373 |
+
# Escape single quotes for Python string literals
|
374 |
+
main_prompt_for_snippet = main_prompt_for_snippet_raw.replace("'", "\\'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
|
377 |
+
lora_loader_line = f"pipe.load_lora_weights('{user_repo_id}', weight_name='{downloaded_files.get('weightName', ['your_lora.safetensors'])[0]}')"
|
378 |
+
|
379 |
+
diffusers_example = ""
|
380 |
+
if is_video:
|
381 |
+
if base_hf_model == "hunyuanvideo-community/HunyuanVideo-I2V":
|
382 |
+
diffusers_example = f"""
|
383 |
+
```py
|
384 |
+
import torch
|
385 |
+
from diffusers import HunyuanVideoImageToVideoPipeline, HunyuanVideoTransformer3DModel
|
386 |
+
from diffusers.utils import load_image, export_to_video
|
387 |
+
|
388 |
+
# Available checkpoints: "hunyuanvideo-community/HunyuanVideo-I2V" and "hunyuanvideo-community/HunyuanVideo-I2V-33ch"
|
389 |
+
model_id = "{base_hf_model}"
|
390 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
391 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16 # Explicitly bfloat16 for transformer
|
392 |
+
)
|
393 |
+
pipe = HunyuanVideoImageToVideoPipeline.from_pretrained(
|
394 |
+
model_id, transformer=transformer, torch_dtype=torch.float16 # float16 for pipeline
|
395 |
+
)
|
396 |
+
pipe.vae.enable_tiling()
|
397 |
+
{lora_loader_line}
|
398 |
+
pipe.to("cuda")
|
399 |
+
|
400 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A detailed scene description'}"
|
401 |
+
# Replace with your image path or URL
|
402 |
+
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png"
|
403 |
+
image = load_image(image_url)
|
404 |
+
|
405 |
+
output = pipe(image=image, prompt=prompt).frames[0]
|
406 |
+
export_to_video(output, "output.mp4", fps=15)
|
407 |
+
```
|
408 |
+
"""
|
409 |
+
elif base_hf_model == "hunyuanvideo-community/HunyuanVideo":
|
410 |
+
diffusers_example = f"""
|
411 |
+
```py
|
412 |
+
import torch
|
413 |
+
from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
|
414 |
+
from diffusers.utils import export_to_video
|
415 |
+
|
416 |
+
model_id = "{base_hf_model}"
|
417 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
|
418 |
+
model_id, subfolder="transformer", torch_dtype=torch.bfloat16
|
419 |
+
)
|
420 |
+
pipe = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.float16)
|
421 |
+
{lora_loader_line}
|
422 |
+
# Enable memory savings
|
423 |
+
pipe.vae.enable_tiling()
|
424 |
+
pipe.enable_model_cpu_offload() # Optional: if VRAM is limited
|
425 |
+
|
426 |
+
output = pipe(
|
427 |
+
prompt="{main_prompt_for_snippet if main_prompt_for_snippet else 'A cinematic video scene'}",
|
428 |
+
height=320, # Adjust as needed
|
429 |
+
width=512, # Adjust as needed
|
430 |
+
num_frames=61, # Adjust as needed
|
431 |
+
num_inference_steps=30, # Adjust as needed
|
432 |
+
).frames[0]
|
433 |
+
export_to_video(output, "output.mp4", fps=15)
|
434 |
+
```
|
435 |
+
"""
|
436 |
+
elif base_hf_model == "Lightricks/LTX-Video-0.9.7-dev" or base_hf_model == "Lightricks/LTX-Video-0.9.7-distilled": # Assuming -dev is the one from mapping
|
437 |
+
# Note: The LTX example is complex. We'll simplify a bit for a LoRA example.
|
438 |
+
# The user might need to adapt the full pipeline if they used the distilled one directly.
|
439 |
+
# We assume the LoRA is trained on the main LTX pipeline.
|
440 |
+
diffusers_example = f"""
|
441 |
+
```py
|
442 |
+
import torch
|
443 |
+
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
444 |
+
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
445 |
+
from diffusers.utils import export_to_video, load_image, load_video
|
446 |
+
|
447 |
+
# Use the base LTX model your LoRA was trained on. The example below uses the distilled version.
|
448 |
+
# Adjust if your LoRA is for the non-distilled "Lightricks/LTX-Video-0.9.7-dev".
|
449 |
+
pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.bfloat16)
|
450 |
+
{lora_loader_line}
|
451 |
+
# The LTX upsampler is separate and typically doesn't have LoRAs loaded into it directly.
|
452 |
+
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=torch.bfloat16)
|
453 |
+
|
454 |
+
pipe.to("cuda")
|
455 |
+
pipe_upsample.to("cuda")
|
456 |
+
pipe.vae.enable_tiling()
|
457 |
+
|
458 |
+
def round_to_nearest_resolution_acceptable_by_vae(height, width, vae_spatial_compression_ratio):
|
459 |
+
height = height - (height % vae_spatial_compression_ratio)
|
460 |
+
width = width - (width % vae_spatial_compression_ratio)
|
461 |
+
return height, width
|
462 |
+
|
463 |
+
# Example image for condition (replace with your own)
|
464 |
+
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/penguin.png")
|
465 |
+
video_for_condition = load_video(export_to_video([image])) # Create a dummy video for conditioning
|
466 |
+
condition1 = LTXVideoCondition(video=video_for_condition, frame_index=0)
|
467 |
+
|
468 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A cute little penguin takes out a book and starts reading it'}"
|
469 |
+
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" # Example
|
470 |
+
expected_height, expected_width = 480, 832 # Target final resolution
|
471 |
+
downscale_factor = 2 / 3
|
472 |
+
num_frames = 32 # Reduced for quicker example
|
473 |
+
|
474 |
+
# Part 1. Generate video at smaller resolution
|
475 |
+
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
476 |
+
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width, pipe.vae_spatial_compression_ratio)
|
477 |
+
|
478 |
+
latents = pipe(
|
479 |
+
conditions=[condition1],
|
480 |
+
prompt=prompt,
|
481 |
+
negative_prompt=negative_prompt,
|
482 |
+
width=downscaled_width,
|
483 |
+
height=downscaled_height,
|
484 |
+
num_frames=num_frames,
|
485 |
+
num_inference_steps=7, # Example steps
|
486 |
+
guidance_scale=1.0, # Example guidance
|
487 |
+
decode_timestep = 0.05,
|
488 |
+
decode_noise_scale = 0.025,
|
489 |
+
generator=torch.Generator().manual_seed(0),
|
490 |
+
output_type="latent",
|
491 |
+
).frames
|
492 |
+
|
493 |
+
# Part 2. Upscale generated video
|
494 |
+
upscaled_latents = pipe_upsample(
|
495 |
+
latents=latents,
|
496 |
+
output_type="latent"
|
497 |
+
).frames
|
498 |
+
|
499 |
+
# Part 3. Denoise the upscaled video (optional, but recommended)
|
500 |
+
video_frames = pipe(
|
501 |
+
conditions=[condition1],
|
502 |
+
prompt=prompt,
|
503 |
+
negative_prompt=negative_prompt,
|
504 |
+
width=downscaled_width * 2, # Upscaled width
|
505 |
+
height=downscaled_height * 2, # Upscaled height
|
506 |
+
num_frames=num_frames,
|
507 |
+
denoise_strength=0.3,
|
508 |
+
num_inference_steps=10,
|
509 |
+
guidance_scale=1.0,
|
510 |
+
latents=upscaled_latents,
|
511 |
+
decode_timestep = 0.05,
|
512 |
+
decode_noise_scale = 0.025,
|
513 |
+
image_cond_noise_scale=0.025, # if using image condition
|
514 |
+
generator=torch.Generator().manual_seed(0),
|
515 |
+
output_type="pil",
|
516 |
+
).frames[0]
|
517 |
+
|
518 |
+
# Part 4. Downscale to target resolution if upscaler overshot
|
519 |
+
final_video = [frame.resize((expected_width, expected_height)) for frame in video_frames]
|
520 |
+
export_to_video(final_video, "output.mp4", fps=16) # Example fps
|
521 |
+
```
|
522 |
+
"""
|
523 |
+
elif base_hf_model.startswith("Wan-AI/Wan2.1-T2V-"):
|
524 |
+
diffusers_example = f"""
|
525 |
+
```py
|
526 |
+
import torch
|
527 |
+
from diffusers import AutoencoderKLWan, WanPipeline
|
528 |
+
from diffusers.utils import export_to_video
|
529 |
+
|
530 |
+
model_id = "{base_hf_model}"
|
531 |
+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) # As per example
|
532 |
+
pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
|
533 |
+
{lora_loader_line}
|
534 |
+
pipe.to("cuda")
|
535 |
+
|
536 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'A cat walks on the grass, realistic'}"
|
537 |
+
negative_prompt = "worst quality, low quality, blurry" # Simplified for LoRA example
|
538 |
+
|
539 |
+
output = pipe(
|
540 |
+
prompt=prompt,
|
541 |
+
negative_prompt=negative_prompt,
|
542 |
+
height=480, # Adjust as needed
|
543 |
+
width=832, # Adjust as needed
|
544 |
+
num_frames=30, # Adjust for LoRA, original example had 81
|
545 |
+
guidance_scale=5.0 # Adjust as needed
|
546 |
+
).frames[0]
|
547 |
+
export_to_video(output, "output.mp4", fps=15)
|
548 |
+
```
|
549 |
+
"""
|
550 |
+
elif base_hf_model.startswith("Wan-AI/Wan2.1-I2V-"):
|
551 |
+
diffusers_example = f"""
|
552 |
+
```py
|
553 |
+
import torch
|
554 |
+
import numpy as np
|
555 |
+
from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
|
556 |
+
from diffusers.utils import export_to_video, load_image
|
557 |
+
from transformers import CLIPVisionModel
|
558 |
+
|
559 |
+
model_id = "{base_hf_model}"
|
560 |
+
# These components are part of the base model, LoRA is loaded into the pipeline
|
561 |
+
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
|
562 |
+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
|
563 |
+
pipe = WanImageToVideoPipeline.from_pretrained(model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16)
|
564 |
+
{lora_loader_line}
|
565 |
+
pipe.to("cuda")
|
566 |
+
|
567 |
+
# Replace with your image path or URL
|
568 |
+
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
|
569 |
+
image = load_image(image_url)
|
570 |
+
|
571 |
+
# Adjust resolution based on model capabilities (480p or 720p variants)
|
572 |
+
# This is a simplified example; refer to original Wan I2V docs for precise resolution handling
|
573 |
+
if "480P" in model_id:
|
574 |
+
max_height, max_width = 480, 832 # Example for 480p
|
575 |
+
elif "720P" in model_id:
|
576 |
+
max_height, max_width = 720, 1280 # Example for 720p
|
577 |
+
else: # Fallback
|
578 |
+
max_height, max_width = 480, 832
|
579 |
+
|
580 |
+
# Simple resize for example, optimal resizing might need to maintain aspect ratio & VAE constraints
|
581 |
+
h, w = image.height, image.width
|
582 |
+
if w > max_width or h > max_height:
|
583 |
+
aspect_ratio = w / h
|
584 |
+
if w > h:
|
585 |
+
new_w = max_width
|
586 |
+
new_h = int(new_w / aspect_ratio)
|
587 |
+
else:
|
588 |
+
new_h = max_height
|
589 |
+
new_w = int(new_h * aspect_ratio)
|
590 |
+
# Ensure dimensions are divisible by VAE scale factors (typically 8 or 16)
|
591 |
+
# This is a basic adjustment, model specific patch sizes might also matter.
|
592 |
+
patch_size_factor = 16 # Common factor
|
593 |
+
new_h = (new_h // patch_size_factor) * patch_size_factor
|
594 |
+
new_w = (new_w // patch_size_factor) * patch_size_factor
|
595 |
+
if new_h > 0 and new_w > 0:
|
596 |
+
image = image.resize((new_w, new_h))
|
597 |
+
else: # Fallback if calculations lead to zero
|
598 |
+
image = image.resize((max_width//2, max_height//2)) # A smaller safe default
|
599 |
+
else:
|
600 |
+
patch_size_factor = 16
|
601 |
+
h = (h // patch_size_factor) * patch_size_factor
|
602 |
+
w = (w // patch_size_factor) * patch_size_factor
|
603 |
+
if h > 0 and w > 0:
|
604 |
+
image = image.resize((w,h))
|
605 |
+
|
606 |
+
|
607 |
+
prompt = "{main_prompt_for_snippet if main_prompt_for_snippet else 'An astronaut in a dynamic scene'}"
|
608 |
+
negative_prompt = "worst quality, low quality, blurry" # Simplified
|
609 |
+
|
610 |
+
output = pipe(
|
611 |
+
image=image,
|
612 |
+
prompt=prompt,
|
613 |
+
negative_prompt=negative_prompt,
|
614 |
+
height=image.height, # Use resized image height
|
615 |
+
width=image.width, # Use resized image width
|
616 |
+
num_frames=30, # Adjust for LoRA
|
617 |
+
guidance_scale=5.0 # Adjust as needed
|
618 |
+
).frames[0]
|
619 |
+
export_to_video(output, "output.mp4", fps=16)
|
620 |
+
```
|
621 |
+
"""
|
622 |
+
else: # Fallback for other video LoRAs
|
623 |
+
diffusers_example = f"""
|
624 |
```py
|
625 |
# This is a video LoRA. Diffusers usage for video models can vary.
|
626 |
+
# You may need to install/import specific pipeline classes from diffusers or the model's community.
|
627 |
+
# Below is a generic placeholder.
|
|
|
628 |
import torch
|
629 |
+
from diffusers import AutoPipelineForTextToVideo # Or the appropriate video pipeline
|
630 |
|
631 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
632 |
|
633 |
+
pipeline = AutoPipelineForTextToVideo.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
634 |
+
{lora_loader_line}
|
635 |
|
636 |
# The following generation command is an example and may need adjustments
|
637 |
+
# based on the specific pipeline and its required parameters for '{base_hf_model}'.
|
638 |
+
# video_frames = pipeline(prompt='{main_prompt_for_snippet}', num_frames=16).frames
|
639 |
# For more details, consult the Hugging Face Hub page for {base_hf_model}
|
640 |
# and the Diffusers documentation on LoRAs and video pipelines.
|
641 |
```
|
|
|
649 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
650 |
|
651 |
pipeline = AutoPipelineForText2Image.from_pretrained('{base_hf_model}', torch_dtype={dtype}).to(device)
|
652 |
+
{lora_loader_line}
|
653 |
image = pipeline('{main_prompt_for_snippet}').images[0]
|
654 |
```
|
655 |
"""
|
|
|
657 |
license_map_simple = {
|
658 |
"Public Domain": "public-domain",
|
659 |
"CreativeML Open RAIL-M": "creativeml-openrail-m",
|
660 |
+
"CreativeML Open RAIL++-M": "creativeml-openrail-m",
|
661 |
"openrail": "creativeml-openrail-m",
|
|
|
|
|
662 |
}
|
663 |
+
commercial_use = info.get("allowCommercialUse", "None")
|
|
|
|
|
664 |
license_identifier = "other"
|
665 |
+
license_name = "bespoke-lora-trained-license"
|
666 |
|
|
|
667 |
if isinstance(commercial_use, str) and commercial_use.lower() == "none" and not info.get("allowDerivatives", True):
|
668 |
+
license_identifier = "creativeml-openrail-m"
|
669 |
license_name = "CreativeML OpenRAIL-M"
|
670 |
+
|
|
|
|
|
|
|
671 |
bespoke_license_link = f"https://multimodal.art/civitai-licenses?allowNoCredit={info['allowNoCredit']}&allowCommercialUse={commercial_use[0] if isinstance(commercial_use, list) and commercial_use else (commercial_use if isinstance(commercial_use, str) else 'None')}&allowDerivatives={info['allowDerivatives']}&allowDifferentLicense={info['allowDifferentLicense']}"
|
672 |
|
|
|
673 |
content = f"""---
|
674 |
license: {license_identifier}
|
675 |
+
license_name: "{license_name}"
|
676 |
license_link: {bespoke_license_link}
|
677 |
tags:
|
678 |
- {unpacked_tags}
|
|
|
680 |
base_model: {base_hf_model}
|
681 |
instance_prompt: {trained_words[0] if trained_words else ''}
|
682 |
widget:
|
683 |
+
{widget_content.strip()}
|
684 |
+
---
|
685 |
|
686 |
# {info["name"]}
|
687 |
|
|
|
705 |
"""
|
706 |
readme_content += content + "\n"
|
707 |
readme_path = os.path.join(folder, "README.md")
|
708 |
+
with open(readme_path, "w", encoding="utf-8") as file:
|
709 |
file.write(readme_content)
|
710 |
print(f"README.md created at {readme_path}")
|
711 |
+
# print(f"README.md content:\n{readme_content}") # For debugging
|
|
|
712 |
|
713 |
def get_creator(username):
|
714 |
url = f"https://civitai.com/api/trpc/user.getCreator?input=%7B%22json%22%3A%7B%22username%22%3A%22{username}%22%2C%22authed%22%3Atrue%7D%7D"
|