Spaces:

weiyi01191
/

DeepOperateAI-Video

Sleeping

App Files Files Community

weiyi01191 commited on Jun 11

Commit

6ffc9b1

verified ·

1 Parent(s): e68ca85

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -4

app.py CHANGED Viewed

@@ -289,6 +289,13 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
     if torch.cuda.is_available():
         torch.cuda.empty_cache()  # 清理缓存
         torch.cuda.synchronize()  # 同步GPU操作
     try:
         # 🔧 使用更保守的生成参数避免cuBLAS错误
@@ -314,21 +321,37 @@ def generate_prediction(video_path, instruction, gen_subtitles=True, stream=Fals
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
                 gc.collect()
             try:
                 # 🔧 使用更小的参数重试
                 answers = model.generate(
                     prepared_images,
                     prompt,
-                    max_new_tokens=128,  # 进一步减少token数
                     do_sample=False,     # 关闭采样减少计算
-                    lengths=[min(length, 16)],  # 减少长度
                     num_beams=1,
-                    temperature=1.0
                 )
                 return answers[0]
             except Exception as e2:
-                return f"GPU运算错误，请重试。错误信息: {str(e2)}"
         else:
             return f"生成预测时出错: {str(e)}"
     except Exception as e:

     if torch.cuda.is_available():
         torch.cuda.empty_cache()  # 清理缓存
         torch.cuda.synchronize()  # 同步GPU操作
+        # 🚀 H200特定优化
+        gpu_name = torch.cuda.get_device_name(0)
+        if "H200" in gpu_name:
+            # H200额外内存清理
+            gc.collect()
+            torch.cuda.reset_peak_memory_stats()
     try:
         # 🔧 使用更保守的生成参数避免cuBLAS错误
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
                 gc.collect()
+                # 🚀 H200特定恢复策略
+                gpu_name = torch.cuda.get_device_name(0)
+                if "H200" in gpu_name:
+                    print("🔧 应用H200特定恢复策略...")
+                    torch.cuda.reset_peak_memory_stats()
+                    # 临时禁用TF32以避免H200精度问题
+                    torch.backends.cuda.matmul.allow_tf32 = False
+                    torch.backends.cudnn.allow_tf32 = False
             try:
                 # 🔧 使用更小的参数重试
                 answers = model.generate(
                     prepared_images,
                     prompt,
+                    max_new_tokens=256,  # 减少token数
                     do_sample=False,     # 关闭采样减少计算
+                    lengths=[min(length, 24)],  # 增加一点长度，但不要太多
                     num_beams=1,
+                    temperature=1.0,
+                    use_cache=False  # H200上禁用缓存
                 )
+                # 🚀 H200恢复TF32设置
+                if torch.cuda.is_available() and "H200" in torch.cuda.get_device_name(0):
+                    torch.backends.cuda.matmul.allow_tf32 = True
+                    torch.backends.cudnn.allow_tf32 = True
                 return answers[0]
             except Exception as e2:
+                return f"GPU运算错误，请重试。H200特定优化已应用。错误信息: {str(e2)}"
         else:
             return f"生成预测时出错: {str(e)}"
     except Exception as e: