Spaces:

HongcanGuo
/

InspiroV

Running

App Files Files Community

HongcanGuo commited on May 30, 2024

Commit

d8d4f3c

verified ·

1 Parent(s): fb43b5c

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -35

app.py CHANGED Viewed

@@ -16,8 +16,8 @@ import tempfile
 # 定义图像到文本函数
 def img2text(image):
-    processor = BlipProcessor.from_pretrained("blip-image-captioning-large")
-    model = BlipForConditionalGeneration.from_pretrained("blip-image-captioning-large")
     inputs = processor(image, return_tensors="pt")
     out = model.generate(**inputs)
     caption = processor.decode(out[0], skip_special_tokens=True)
@@ -78,12 +78,12 @@ def text2vid(input_text):
     sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
     # 加载动作适配器和动画扩散管道
-    adapter = MotionAdapter.from_pretrained("/home/u2022211776/jupyterlab/AnimateLCM", config_file="/home/u2022211776/jupyterlab/AnimateLCM/config.json", torch_dtype=torch.float16)
-    pipe = AnimateDiffPipeline.from_pretrained("/home/u2022211776/jupyterlab/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
     # 加载LoRA权重
-    pipe.load_lora_weights("/home/u2022211776/jupyterlab/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
     # 设置适配器并启用功能
     try:
@@ -114,34 +114,6 @@ def text2vid(input_text):
-# 定义文本到视频函数
-def text2vid(input_text):
-    sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
-    adapter = MotionAdapter.from_pretrained("AnimateLCM", config_file="AnimateLCM/config.json", torch_dtype=torch.float16)
-    pipe = AnimateDiffPipeline.from_pretrained("epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
-    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
-    pipe.load_lora_weights("AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
-    try:
-        pipe.set_adapters(["lcm-lora"], [0.8])
-    except ValueError as e:
-        print("Ignoring the error:", str(e))
-    pipe.enable_vae_slicing()
-    pipe.enable_model_cpu_offload()
-    video_frames = []
-    for sentence in sentences:
-        output = pipe(
-            prompt=sentence + ", 4k, high resolution",
-            negative_prompt="bad quality, worse quality, low resolution",
-            num_frames=24,
-            guidance_scale=2.0,
-            num_inference_steps=6,
-            generator=torch.Generator("cpu").manual_seed(0)
-        )
-        video_frames.extend(output.frames[0])
-    return video_frames
 def text2text_A(user_input):
     # 设置API密钥和基础URL
     api_key = "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"
@@ -176,8 +148,8 @@ def text2text_A(user_input):
 # 定义文本到音频函数
 def text2audio(text_input, duration_seconds):
-    processor = AutoProcessor.from_pretrained("musicgen-small")
-    model = MusicgenForConditionalGeneration.from_pretrained("musicgen-small")
     inputs = processor(text=[text_input], padding=True, return_tensors="pt")
     max_new_tokens = int((duration_seconds / 5) * 256)
     audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)

 # 定义图像到文本函数
 def img2text(image):
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
     inputs = processor(image, return_tensors="pt")
     out = model.generate(**inputs)
     caption = processor.decode(out[0], skip_special_tokens=True)
     sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
     # 加载动作适配器和动画扩散管道
+    adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/AnimateLCM/config.json", torch_dtype=torch.float16)
+    pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
     # 加载LoRA权重
+    pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
     # 设置适配器并启用功能
     try:
 def text2text_A(user_input):
     # 设置API密钥和基础URL
     api_key = "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"
 # 定义文本到音频函数
 def text2audio(text_input, duration_seconds):
+    processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+    model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
     inputs = processor(text=[text_input], padding=True, return_tensors="pt")
     max_new_tokens = int((duration_seconds / 5) * 256)
     audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)