Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -289,6 +289,13 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
289 |
if torch.cuda.is_available():
|
290 |
torch.cuda.empty_cache() # 清理缓存
|
291 |
torch.cuda.synchronize() # 同步GPU操作
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
try:
|
294 |
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
@@ -314,21 +321,37 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
|
|
314 |
torch.cuda.empty_cache()
|
315 |
torch.cuda.synchronize()
|
316 |
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
try:
|
319 |
# 🔧 使用更小的参数重试
|
320 |
answers = model.generate(
|
321 |
prepared_images,
|
322 |
prompt,
|
323 |
-
max_new_tokens=
|
324 |
do_sample=False, # 关闭采样减少计算
|
325 |
-
lengths=[min(length,
|
326 |
num_beams=1,
|
327 |
-
temperature=1.0
|
|
|
328 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
return answers[0]
|
330 |
except Exception as e2:
|
331 |
-
return f"GPU
|
332 |
else:
|
333 |
return f"生成预测时出错: {str(e)}"
|
334 |
except Exception as e:
|
|
|
289 |
if torch.cuda.is_available():
|
290 |
torch.cuda.empty_cache() # 清理缓存
|
291 |
torch.cuda.synchronize() # 同步GPU操作
|
292 |
+
|
293 |
+
# 🚀 H200特定优化
|
294 |
+
gpu_name = torch.cuda.get_device_name(0)
|
295 |
+
if "H200" in gpu_name:
|
296 |
+
# H200额外内存清理
|
297 |
+
gc.collect()
|
298 |
+
torch.cuda.reset_peak_memory_stats()
|
299 |
|
300 |
try:
|
301 |
# 🔧 使用更保守的生成参数避免cuBLAS错误
|
|
|
321 |
torch.cuda.empty_cache()
|
322 |
torch.cuda.synchronize()
|
323 |
gc.collect()
|
324 |
+
|
325 |
+
# 🚀 H200特定恢复策略
|
326 |
+
gpu_name = torch.cuda.get_device_name(0)
|
327 |
+
if "H200" in gpu_name:
|
328 |
+
print("🔧 应用H200特定恢复策略...")
|
329 |
+
torch.cuda.reset_peak_memory_stats()
|
330 |
+
# 临时禁用TF32以避免H200精度问题
|
331 |
+
torch.backends.cuda.matmul.allow_tf32 = False
|
332 |
+
torch.backends.cudnn.allow_tf32 = False
|
333 |
|
334 |
try:
|
335 |
# 🔧 使用更小的参数重试
|
336 |
answers = model.generate(
|
337 |
prepared_images,
|
338 |
prompt,
|
339 |
+
max_new_tokens=256, # 减少token数
|
340 |
do_sample=False, # 关闭采样减少计算
|
341 |
+
lengths=[min(length, 24)], # 增加一点长度,但不要太多
|
342 |
num_beams=1,
|
343 |
+
temperature=1.0,
|
344 |
+
use_cache=False # H200上禁用缓存
|
345 |
)
|
346 |
+
|
347 |
+
# 🚀 H200恢复TF32设置
|
348 |
+
if torch.cuda.is_available() and "H200" in torch.cuda.get_device_name(0):
|
349 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
350 |
+
torch.backends.cudnn.allow_tf32 = True
|
351 |
+
|
352 |
return answers[0]
|
353 |
except Exception as e2:
|
354 |
+
return f"GPU运算错误,请重试。H200特定优化已应用。错误信息: {str(e2)}"
|
355 |
else:
|
356 |
return f"生成预测时出错: {str(e)}"
|
357 |
except Exception as e:
|