lalalalalalalalalala commited on
Commit
bab8ee6
·
verified ·
1 Parent(s): 3ee7666

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +23 -25
run.py CHANGED
@@ -35,33 +35,31 @@ def fast_caption(sys_prompt, usr_prompt, temp, top_p, max_tokens, model, key, en
35
  elif video_hf and video_hf_auth:
36
  # Process all videos in the dataset
37
  all_captions = []
38
- with tempfile.NamedTemporaryFile(mode='w+t', delete=True) as temp_parquet_file:
39
- temp_parquet_file.write(hf_hub_download(
40
- repo_id=video_hf,
41
- filename='data/' + str(parquet_index).zfill(6) + '.parquet',
42
- repo_type="dataset",
43
- token=video_hf_auth,
44
- ))
45
- parquet_path = temp_parquet_file.name
46
- print(parquet_path)
47
- parquet_file = pq.ParquetFile(parquet_path)
48
-
49
- for batch in parquet_file.iter_batches(batch_size=1):
50
- df = batch.to_pandas()
51
- video = df['video'][0]
52
 
53
- md5 = hashlib.md5(video).hexdigest()
54
- with tempfile.NamedTemporaryFile(mode='w+t', delete=True) as temp_video_file:
55
- temp_video_file.write(video)
56
- video_path = temp_video_file.name
57
- print(video_path)
58
 
59
- processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
60
- frames = processor._decode(video_path)
61
- base64_list = processor.to_base64_list(frames)
62
- api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
63
- caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
64
- all_captions.append(caption)
65
 
66
  return "\n\n\n".join(all_captions), f"Processed {len(video_paths)} videos.", None
67
 
 
35
  elif video_hf and video_hf_auth:
36
  # Process all videos in the dataset
37
  all_captions = []
38
+ temp_parquet_file = hf_hub_download(
39
+ repo_id=video_hf,
40
+ filename='data/' + str(parquet_index).zfill(6) + '.parquet',
41
+ repo_type="dataset",
42
+ token=video_hf_auth,
43
+ )
44
+ print(temp_parquet_file)
45
+ parquet_file = pq.ParquetFile(parquet_path)
46
+
47
+ for batch in parquet_file.iter_batches(batch_size=1):
48
+ df = batch.to_pandas()
49
+ video = df['video'][0]
 
 
50
 
51
+ md5 = hashlib.md5(video).hexdigest()
52
+ with tempfile.NamedTemporaryFile(mode='w+t', delete=True) as temp_video_file:
53
+ temp_video_file.write(video)
54
+ video_path = temp_video_file.name
55
+ print(video_path)
56
 
57
+ processor = VideoProcessor(frame_format=frame_format, frame_limit=frame_limit)
58
+ frames = processor._decode(video_path)
59
+ base64_list = processor.to_base64_list(frames)
60
+ api = AzureAPI(key=key, endpoint=endpoint, model=model, temp=temp, top_p=top_p, max_tokens=max_tokens)
61
+ caption = api.get_caption(sys_prompt, usr_prompt, base64_list)
62
+ all_captions.append(caption)
63
 
64
  return "\n\n\n".join(all_captions), f"Processed {len(video_paths)} videos.", None
65