Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -275,16 +275,52 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
275 |
# 设置随机种子
|
276 |
setup_seeds(seed)
|
277 |
|
|
|
|
|
|
|
|
|
|
|
278 |
try:
|
|
|
279 |
answers = model.generate(
|
280 |
prepared_images,
|
281 |
prompt,
|
282 |
-
max_new_tokens=args.max_new_tokens if args else
|
283 |
do_sample=True,
|
284 |
lengths=[length],
|
285 |
-
num_beams=1
|
|
|
|
|
|
|
286 |
)
|
287 |
return answers[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
except Exception as e:
|
289 |
return f"生成预测时出错: {str(e)}"
|
290 |
|
|
|
275 |
# 设置随机种子
|
276 |
setup_seeds(seed)
|
277 |
|
278 |
+
# 🔧 GPU内存优化和cuBLAS错误处理
|
279 |
+
if torch.cuda.is_available():
|
280 |
+
torch.cuda.empty_cache() # 清理缓存
|
281 |
+
torch.cuda.synchronize() # 同步GPU操作
|
282 |
+
|
283 |
try:
|
284 |
+
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
285 |
answers = model.generate(
|
286 |
prepared_images,
|
287 |
prompt,
|
288 |
+
max_new_tokens=min(args.max_new_tokens if args else 256, 256), # 限制最大token数
|
289 |
do_sample=True,
|
290 |
lengths=[length],
|
291 |
+
num_beams=1, # 保持beam=1减少计算
|
292 |
+
temperature=0.8, # 添加温度参数
|
293 |
+
top_p=0.9, # 添加top_p参数
|
294 |
+
repetition_penalty=1.1 # 避免重复
|
295 |
)
|
296 |
return answers[0]
|
297 |
+
except RuntimeError as e:
|
298 |
+
if "cublasLt" in str(e) or "cuBLAS" in str(e):
|
299 |
+
# 🚨 cuBLAS错误特殊处理
|
300 |
+
print(f"⚠️ 检测到cuBLAS错误,尝试降级处理: {e}")
|
301 |
+
|
302 |
+
# 强制清理GPU内存
|
303 |
+
if torch.cuda.is_available():
|
304 |
+
torch.cuda.empty_cache()
|
305 |
+
torch.cuda.synchronize()
|
306 |
+
gc.collect()
|
307 |
+
|
308 |
+
try:
|
309 |
+
# 🔧 使用更小的参数重试
|
310 |
+
answers = model.generate(
|
311 |
+
prepared_images,
|
312 |
+
prompt,
|
313 |
+
max_new_tokens=128, # 进一步减少token数
|
314 |
+
do_sample=False, # 关闭采样减少计算
|
315 |
+
lengths=[min(length, 16)], # 减少长度
|
316 |
+
num_beams=1,
|
317 |
+
temperature=1.0
|
318 |
+
)
|
319 |
+
return answers[0]
|
320 |
+
except Exception as e2:
|
321 |
+
return f"GPU运算错误,请重试。错误信息: {str(e2)}"
|
322 |
+
else:
|
323 |
+
return f"生成预测时出错: {str(e)}"
|
324 |
except Exception as e:
|
325 |
return f"生成预测时出错: {str(e)}"
|
326 |
|