HongcanGuo commited on
Commit
06ab368
·
verified ·
1 Parent(s): 38aeb3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -14,8 +14,8 @@ import os
14
 
15
  # 定义图像到文本函数
16
  def img2text(image):
17
- processor = BlipProcessor.from_pretrained("blip-image-captioning-large")
18
- model = BlipForConditionalGeneration.from_pretrained("blip-image-captioning-large")
19
  inputs = processor(image, return_tensors="pt")
20
  out = model.generate(**inputs)
21
  caption = processor.decode(out[0], skip_special_tokens=True)
@@ -65,10 +65,10 @@ def text2text(user_input):
65
  # 定义文本到视频函数
66
  def text2vid(input_text):
67
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
68
- adapter = MotionAdapter.from_pretrained("AnimateLCM", config_file="AnimateLCM/config.json", torch_dtype=torch.float16)
69
- pipe = AnimateDiffPipeline.from_pretrained("epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
70
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
71
- pipe.load_lora_weights("AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
72
  try:
73
  pipe.set_adapters(["lcm-lora"], [0.8])
74
  except ValueError as e:
@@ -97,8 +97,8 @@ def video_generate():
97
 
98
  # 定义文本到音频函数
99
  def text2audio(text_input, duration_seconds):
100
- processor = AutoProcessor.from_pretrained("musicgen-small")
101
- model = MusicgenForConditionalGeneration.from_pretrained("musicgen-small")
102
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
103
  max_new_tokens = int((duration_seconds / 5) * 256)
104
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
 
14
 
15
  # 定义图像到文本函数
16
  def img2text(image):
17
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
18
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
19
  inputs = processor(image, return_tensors="pt")
20
  out = model.generate(**inputs)
21
  caption = processor.decode(out[0], skip_special_tokens=True)
 
65
  # 定义文本到视频函数
66
  def text2vid(input_text):
67
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
68
+ adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/config.json", torch_dtype=torch.float16)
69
+ pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
70
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
71
+ pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
72
  try:
73
  pipe.set_adapters(["lcm-lora"], [0.8])
74
  except ValueError as e:
 
97
 
98
  # 定义文本到音频函数
99
  def text2audio(text_input, duration_seconds):
100
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
101
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
102
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
103
  max_new_tokens = int((duration_seconds / 5) * 256)
104
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)