|
from nltk import sent_tokenize
|
|
from transcribe import format_time
|
|
|
|
def frame_to_timecode(frame_num, total_frames, duration):
|
|
total_seconds = (frame_num / total_frames) * duration
|
|
hours = int(total_seconds // 3600)
|
|
minutes = int((total_seconds % 3600) // 60)
|
|
seconds = int(total_seconds % 60)
|
|
milliseconds = int((total_seconds - int(total_seconds)) * 1000)
|
|
return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
|
|
|
|
def seconds_to_timecode(seconds):
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
seconds = int(seconds % 60)
|
|
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
|
|
|
def timecode_to_seconds(timecode):
|
|
h, m, s = map(int, timecode.split(':'))
|
|
return h * 3600 + m * 60 + s
|
|
|
|
def add_timecode_to_image(image, timecode):
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import numpy as np
|
|
|
|
img_pil = Image.fromarray(image)
|
|
draw = ImageDraw.Draw(img_pil)
|
|
font = ImageFont.truetype("arial.ttf", 15)
|
|
draw.text((10, 10), timecode, (255, 0, 0), font=font)
|
|
return np.array(img_pil)
|
|
|
|
def flexible_timecode_to_seconds(timecode):
|
|
try:
|
|
if ',' in timecode:
|
|
h, m, s = timecode.replace(',', '.').split(':')
|
|
else:
|
|
h, m, s = timecode.split(':')
|
|
return int(float(h)) * 3600 + int(float(m)) * 60 + float(s)
|
|
except ValueError:
|
|
print(f"Invalid timecode format: {timecode}")
|
|
return 0
|
|
|
|
def add_timecode_to_image_body(image, timecode):
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import numpy as np
|
|
|
|
img_pil = Image.fromarray(image)
|
|
draw = ImageDraw.Draw(img_pil)
|
|
font = ImageFont.truetype("arial.ttf", 100)
|
|
draw.text((10, 10), timecode, (255, 0, 0), font=font)
|
|
return np.array(img_pil)
|
|
|
|
def parse_transcription(transcription_output, video_duration):
|
|
|
|
if transcription_output.startswith("Text Transcription:"):
|
|
transcription_output = transcription_output.split("Text Transcription:", 1)[1].strip()
|
|
|
|
sentences = sent_tokenize(transcription_output)
|
|
total_chars = sum(len(s) for s in sentences)
|
|
sentences_with_timecodes = []
|
|
current_time = 0
|
|
|
|
for sentence in sentences:
|
|
sentence_duration = (len(sentence) / total_chars) * video_duration
|
|
end_time = current_time + sentence_duration
|
|
timecode = format_time(current_time)
|
|
sentences_with_timecodes.append((timecode, sentence))
|
|
current_time = end_time
|
|
|
|
return sentences_with_timecodes
|
|
|
|
|
|
def get_sentences_before_anomalies(sentences_with_timecodes, anomaly_timecodes, time_threshold=5):
|
|
anomaly_sentences = {}
|
|
for anomaly_timecode in anomaly_timecodes:
|
|
try:
|
|
anomaly_time = flexible_timecode_to_seconds(anomaly_timecode)
|
|
relevant_sentences = [
|
|
(timecode, sentence) for timecode, sentence in sentences_with_timecodes
|
|
if 0 <= anomaly_time - flexible_timecode_to_seconds(timecode) <= time_threshold
|
|
]
|
|
if relevant_sentences:
|
|
|
|
key = tuple((timecode, sentence) for timecode, sentence in relevant_sentences)
|
|
if key not in anomaly_sentences:
|
|
anomaly_sentences[key] = anomaly_timecode
|
|
except Exception as e:
|
|
print(f"Error processing anomaly timecode {anomaly_timecode}: {str(e)}")
|
|
continue
|
|
return [(timecode, list(sentences)) for sentences, timecode in anomaly_sentences.items()]
|
|
|
|
def timecode_to_seconds(timecode):
|
|
h, m, s = map(float, timecode.split(':'))
|
|
return h * 3600 + m * 60 + s |