reab5555 commited on
Commit
2b50de4
·
verified ·
1 Parent(s): a1ef92e

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +41 -93
utils.py CHANGED
@@ -1,94 +1,42 @@
1
- from nltk import sent_tokenize
2
- from transcribe import format_time
3
-
4
- def frame_to_timecode(frame_num, total_frames, duration):
5
- total_seconds = (frame_num / total_frames) * duration
6
- hours = int(total_seconds // 3600)
7
- minutes = int((total_seconds % 3600) // 60)
8
- seconds = int(total_seconds % 60)
9
- milliseconds = int((total_seconds - int(total_seconds)) * 1000)
10
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
11
-
12
- def seconds_to_timecode(seconds):
13
- hours = int(seconds // 3600)
14
- minutes = int((seconds % 3600) // 60)
15
- seconds = int(seconds % 60)
16
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
17
-
18
- def timecode_to_seconds(timecode):
19
- h, m, s = map(int, timecode.split(':'))
20
- return h * 3600 + m * 60 + s
21
-
22
- def add_timecode_to_image(image, timecode):
23
- from PIL import Image, ImageDraw, ImageFont
24
- import numpy as np
25
-
26
- img_pil = Image.fromarray(image)
27
- draw = ImageDraw.Draw(img_pil)
28
- font = ImageFont.truetype("arial.ttf", 15)
29
- draw.text((10, 10), timecode, (255, 0, 0), font=font)
30
- return np.array(img_pil)
31
-
32
- def flexible_timecode_to_seconds(timecode):
33
- try:
34
- if ',' in timecode:
35
- h, m, s = timecode.replace(',', '.').split(':')
36
- else:
37
- h, m, s = timecode.split(':')
38
- return int(float(h)) * 3600 + int(float(m)) * 60 + float(s)
39
- except ValueError:
40
- print(f"Invalid timecode format: {timecode}")
41
- return 0
42
-
43
- def add_timecode_to_image_body(image, timecode):
44
- from PIL import Image, ImageDraw, ImageFont
45
- import numpy as np
46
-
47
- img_pil = Image.fromarray(image)
48
- draw = ImageDraw.Draw(img_pil)
49
- font = ImageFont.truetype("arial.ttf", 100)
50
- draw.text((10, 10), timecode, (255, 0, 0), font=font)
51
- return np.array(img_pil)
52
-
53
- def parse_transcription(transcription_output, video_duration):
54
- # Remove the "Text Transcription:" prefix if it exists
55
- if transcription_output.startswith("Text Transcription:"):
56
- transcription_output = transcription_output.split("Text Transcription:", 1)[1].strip()
57
-
58
- sentences = sent_tokenize(transcription_output)
59
- total_chars = sum(len(s) for s in sentences)
60
- sentences_with_timecodes = []
61
- current_time = 0
62
-
63
- for sentence in sentences:
64
- sentence_duration = (len(sentence) / total_chars) * video_duration
65
- end_time = current_time + sentence_duration
66
- timecode = format_time(current_time)
67
- sentences_with_timecodes.append((timecode, sentence))
68
- current_time = end_time
69
-
70
- return sentences_with_timecodes
71
-
72
-
73
- def get_sentences_before_anomalies(sentences_with_timecodes, anomaly_timecodes, time_threshold=5):
74
- anomaly_sentences = {}
75
- for anomaly_timecode in anomaly_timecodes:
76
- try:
77
- anomaly_time = flexible_timecode_to_seconds(anomaly_timecode)
78
- relevant_sentences = [
79
- (timecode, sentence) for timecode, sentence in sentences_with_timecodes
80
- if 0 <= anomaly_time - flexible_timecode_to_seconds(timecode) <= time_threshold
81
- ]
82
- if relevant_sentences:
83
- # Use the sentences as the key to avoid duplicates
84
- key = tuple((timecode, sentence) for timecode, sentence in relevant_sentences)
85
- if key not in anomaly_sentences:
86
- anomaly_sentences[key] = anomaly_timecode
87
- except Exception as e:
88
- print(f"Error processing anomaly timecode {anomaly_timecode}: {str(e)}")
89
- continue
90
- return [(timecode, list(sentences)) for sentences, timecode in anomaly_sentences.items()]
91
-
92
- def timecode_to_seconds(timecode):
93
- h, m, s = map(float, timecode.split(':'))
94
  return h * 3600 + m * 60 + s
 
1
+
2
+ def frame_to_timecode(frame_num, total_frames, duration):
3
+ total_seconds = (frame_num / total_frames) * duration
4
+ hours = int(total_seconds // 3600)
5
+ minutes = int((total_seconds % 3600) // 60)
6
+ seconds = int(total_seconds % 60)
7
+ milliseconds = int((total_seconds - int(total_seconds)) * 1000)
8
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
9
+
10
+ def seconds_to_timecode(seconds):
11
+ hours = int(seconds // 3600)
12
+ minutes = int((seconds % 3600) // 60)
13
+ seconds = int(seconds % 60)
14
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
15
+
16
+ def timecode_to_seconds(timecode):
17
+ h, m, s = map(int, timecode.split(':'))
18
+ return h * 3600 + m * 60 + s
19
+
20
+ def add_timecode_to_image(image, timecode):
21
+ from PIL import Image, ImageDraw, ImageFont
22
+ import numpy as np
23
+
24
+ img_pil = Image.fromarray(image)
25
+ draw = ImageDraw.Draw(img_pil)
26
+ font = ImageFont.truetype("arial.ttf", 15)
27
+ draw.text((10, 10), timecode, (255, 0, 0), font=font)
28
+ return np.array(img_pil)
29
+
30
+ def add_timecode_to_image_body(image, timecode):
31
+ from PIL import Image, ImageDraw, ImageFont
32
+ import numpy as np
33
+
34
+ img_pil = Image.fromarray(image)
35
+ draw = ImageDraw.Draw(img_pil)
36
+ font = ImageFont.truetype("arial.ttf", 100)
37
+ draw.text((10, 10), timecode, (255, 0, 0), font=font)
38
+ return np.array(img_pil)
39
+
40
+ def timecode_to_seconds(timecode):
41
+ h, m, s = map(float, timecode.split(':'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  return h * 3600 + m * 60 + s