Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -289,6 +289,13 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
| 289 |
if torch.cuda.is_available():
|
| 290 |
torch.cuda.empty_cache() # 清理缓存
|
| 291 |
torch.cuda.synchronize() # 同步GPU操作
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
try:
|
| 294 |
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
|
@@ -314,21 +321,37 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
| 314 |
torch.cuda.empty_cache()
|
| 315 |
torch.cuda.synchronize()
|
| 316 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
try:
|
| 319 |
# 🔧 使用更小的参数重试
|
| 320 |
answers = model.generate(
|
| 321 |
prepared_images,
|
| 322 |
prompt,
|
| 323 |
-
max_new_tokens=
|
| 324 |
do_sample=False, # 关闭采样减少计算
|
| 325 |
-
lengths=[min(length,
|
| 326 |
num_beams=1,
|
| 327 |
-
temperature=1.0
|
|
|
|
| 328 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
return answers[0]
|
| 330 |
except Exception as e2:
|
| 331 |
-
return f"GPU
|
| 332 |
else:
|
| 333 |
return f"生成预测时出错: {str(e)}"
|
| 334 |
except Exception as e:
|
|
|
|
| 289 |
if torch.cuda.is_available():
|
| 290 |
torch.cuda.empty_cache() # 清理缓存
|
| 291 |
torch.cuda.synchronize() # 同步GPU操作
|
| 292 |
+
|
| 293 |
+
# 🚀 H200特定优化
|
| 294 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 295 |
+
if "H200" in gpu_name:
|
| 296 |
+
# H200额外内存清理
|
| 297 |
+
gc.collect()
|
| 298 |
+
torch.cuda.reset_peak_memory_stats()
|
| 299 |
|
| 300 |
try:
|
| 301 |
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
|
|
|
| 321 |
torch.cuda.empty_cache()
|
| 322 |
torch.cuda.synchronize()
|
| 323 |
gc.collect()
|
| 324 |
+
|
| 325 |
+
# 🚀 H200特定恢复策略
|
| 326 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 327 |
+
if "H200" in gpu_name:
|
| 328 |
+
print("🔧 应用H200特定恢复策略...")
|
| 329 |
+
torch.cuda.reset_peak_memory_stats()
|
| 330 |
+
# 临时禁用TF32以避免H200精度问题
|
| 331 |
+
torch.backends.cuda.matmul.allow_tf32 = False
|
| 332 |
+
torch.backends.cudnn.allow_tf32 = False
|
| 333 |
|
| 334 |
try:
|
| 335 |
# 🔧 使用更小的参数重试
|
| 336 |
answers = model.generate(
|
| 337 |
prepared_images,
|
| 338 |
prompt,
|
| 339 |
+
max_new_tokens=256, # 减少token数
|
| 340 |
do_sample=False, # 关闭采样减少计算
|
| 341 |
+
lengths=[min(length, 24)], # 增加一点长度,但不要太多
|
| 342 |
num_beams=1,
|
| 343 |
+
temperature=1.0,
|
| 344 |
+
use_cache=False # H200上禁用缓存
|
| 345 |
)
|
| 346 |
+
|
| 347 |
+
# 🚀 H200恢复TF32设置
|
| 348 |
+
if torch.cuda.is_available() and "H200" in torch.cuda.get_device_name(0):
|
| 349 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 350 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 351 |
+
|
| 352 |
return answers[0]
|
| 353 |
except Exception as e2:
|
| 354 |
+
return f"GPU运算错误,请重试。H200特定优化已应用。错误信息: {str(e2)}"
|
| 355 |
else:
|
| 356 |
return f"生成预测时出错: {str(e)}"
|
| 357 |
except Exception as e:
|