GPT4o-Azure-Caption-Pixel

Sleeping

lalalalalalalalalala commited on Jun 14, 2024

Commit

98d1f42

verified ·

1 Parent(s): 31c66a7

Update run.py

Files changed (1) hide show

run.py CHANGED Viewed

@@ -8,10 +8,12 @@ def load_hf_dataset(dataset_path, auth_token):
     dataset = load_dataset(dataset_path, token=auth_token)
     video_paths = dataset
     return video_paths
 def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit):
     if video_src:
         video = video_src
         processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
@@ -27,11 +29,13 @@ def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, en
         caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
         return f"{caption}", f"Using model '{model}' with {len(frames)} frames extracted.", debug_image
     elif video_hf and video_hf_auth:
         # Handle Hugging Face dataset
         video_paths = load_hf_dataset(video_hf, video_hf_auth)
         # Process all videos in the dataset
         all_captions = []
         for video_path in video_paths:
             if video_path.endswith('.mp4'):  # 假设我们只处理.mp4文件
                 processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
                 frames = processor._decode(video_path)

     dataset = load_dataset(dataset_path, token=auth_token)
     video_paths = dataset
+    print("load done")
     return video_paths
 def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, endpoint, video_src, video_hf, video_hf_auth, video_od, video_od_auth, video_gd, video_gd_auth, frame_format, frame_limit):
+    print("begin caption")
     if video_src:
         video = video_src
         processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
         caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
         return f"{caption}", f"Using model '{model}' with {len(frames)} frames extracted.", debug_image
     elif video_hf and video_hf_auth:
+        print("begin video_hf")
         # Handle Hugging Face dataset
         video_paths = load_hf_dataset(video_hf, video_hf_auth)
         # Process all videos in the dataset
         all_captions = []
         for video_path in video_paths:
+            print("video_path")
             if video_path.endswith('.mp4'):  # 假设我们只处理.mp4文件
                 processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
                 frames = processor._decode(video_path)