HongcanGuo commited on
Commit
d8d4f3c
·
verified ·
1 Parent(s): fb43b5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -35
app.py CHANGED
@@ -16,8 +16,8 @@ import tempfile
16
 
17
  # 定义图像到文本函数
18
  def img2text(image):
19
- processor = BlipProcessor.from_pretrained("blip-image-captioning-large")
20
- model = BlipForConditionalGeneration.from_pretrained("blip-image-captioning-large")
21
  inputs = processor(image, return_tensors="pt")
22
  out = model.generate(**inputs)
23
  caption = processor.decode(out[0], skip_special_tokens=True)
@@ -78,12 +78,12 @@ def text2vid(input_text):
78
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
79
 
80
  # 加载动作适配器和动画扩散管道
81
- adapter = MotionAdapter.from_pretrained("/home/u2022211776/jupyterlab/AnimateLCM", config_file="/home/u2022211776/jupyterlab/AnimateLCM/config.json", torch_dtype=torch.float16)
82
- pipe = AnimateDiffPipeline.from_pretrained("/home/u2022211776/jupyterlab/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
83
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
84
 
85
  # 加载LoRA权重
86
- pipe.load_lora_weights("/home/u2022211776/jupyterlab/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
87
 
88
  # 设置适配器并启用功能
89
  try:
@@ -114,34 +114,6 @@ def text2vid(input_text):
114
 
115
 
116
 
117
- # 定义文本到视频函数
118
- def text2vid(input_text):
119
- sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
120
- adapter = MotionAdapter.from_pretrained("AnimateLCM", config_file="AnimateLCM/config.json", torch_dtype=torch.float16)
121
- pipe = AnimateDiffPipeline.from_pretrained("epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
122
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
123
- pipe.load_lora_weights("AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
124
- try:
125
- pipe.set_adapters(["lcm-lora"], [0.8])
126
- except ValueError as e:
127
- print("Ignoring the error:", str(e))
128
- pipe.enable_vae_slicing()
129
- pipe.enable_model_cpu_offload()
130
-
131
- video_frames = []
132
- for sentence in sentences:
133
- output = pipe(
134
- prompt=sentence + ", 4k, high resolution",
135
- negative_prompt="bad quality, worse quality, low resolution",
136
- num_frames=24,
137
- guidance_scale=2.0,
138
- num_inference_steps=6,
139
- generator=torch.Generator("cpu").manual_seed(0)
140
- )
141
- video_frames.extend(output.frames[0])
142
-
143
- return video_frames
144
-
145
  def text2text_A(user_input):
146
  # 设置API密钥和基础URL
147
  api_key = "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"
@@ -176,8 +148,8 @@ def text2text_A(user_input):
176
 
177
  # 定义文本到音频函数
178
  def text2audio(text_input, duration_seconds):
179
- processor = AutoProcessor.from_pretrained("musicgen-small")
180
- model = MusicgenForConditionalGeneration.from_pretrained("musicgen-small")
181
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
182
  max_new_tokens = int((duration_seconds / 5) * 256)
183
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)
 
16
 
17
  # 定义图像到文本函数
18
  def img2text(image):
19
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
20
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
21
  inputs = processor(image, return_tensors="pt")
22
  out = model.generate(**inputs)
23
  caption = processor.decode(out[0], skip_special_tokens=True)
 
78
  sentences = re.findall(r'\[\d+\] (.+?)(?:\n|\Z)', input_text)
79
 
80
  # 加载动作适配器和动画扩散管道
81
+ adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", config_file="wangfuyun/AnimateLCM/AnimateLCM/config.json", torch_dtype=torch.float16)
82
+ pipe = AnimateDiffPipeline.from_pretrained("emilianJR/epiCRealism", motion_adapter=adapter, torch_dtype=torch.float16)
83
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear")
84
 
85
  # 加载LoRA权重
86
+ pipe.load_lora_weights("wangfuyun/AnimateLCM", weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora")
87
 
88
  # 设置适配器并启用功能
89
  try:
 
114
 
115
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def text2text_A(user_input):
118
  # 设置API密钥和基础URL
119
  api_key = "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"
 
148
 
149
  # 定义文本到音频函数
150
  def text2audio(text_input, duration_seconds):
151
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
152
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
153
  inputs = processor(text=[text_input], padding=True, return_tensors="pt")
154
  max_new_tokens = int((duration_seconds / 5) * 256)
155
  audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens)