from nltk import sent_tokenize from transcribe import format_time def frame_to_timecode(frame_num, total_frames, duration): total_seconds = (frame_num / total_frames) * duration hours = int(total_seconds // 3600) minutes = int((total_seconds % 3600) // 60) seconds = int(total_seconds % 60) milliseconds = int((total_seconds - int(total_seconds)) * 1000) return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}" def seconds_to_timecode(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds = int(seconds % 60) return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def timecode_to_seconds(timecode): h, m, s = map(int, timecode.split(':')) return h * 3600 + m * 60 + s def add_timecode_to_image(image, timecode): from PIL import Image, ImageDraw, ImageFont import numpy as np img_pil = Image.fromarray(image) draw = ImageDraw.Draw(img_pil) font = ImageFont.truetype("arial.ttf", 15) draw.text((10, 10), timecode, (255, 0, 0), font=font) return np.array(img_pil) def flexible_timecode_to_seconds(timecode): try: if ',' in timecode: h, m, s = timecode.replace(',', '.').split(':') else: h, m, s = timecode.split(':') return int(float(h)) * 3600 + int(float(m)) * 60 + float(s) except ValueError: print(f"Invalid timecode format: {timecode}") return 0 def add_timecode_to_image_body(image, timecode): from PIL import Image, ImageDraw, ImageFont import numpy as np img_pil = Image.fromarray(image) draw = ImageDraw.Draw(img_pil) font = ImageFont.truetype("arial.ttf", 100) draw.text((10, 10), timecode, (255, 0, 0), font=font) return np.array(img_pil) def parse_transcription(transcription_output, video_duration): # Remove the "Text Transcription:" prefix if it exists if transcription_output.startswith("Text Transcription:"): transcription_output = transcription_output.split("Text Transcription:", 1)[1].strip() sentences = sent_tokenize(transcription_output) total_chars = sum(len(s) for s in sentences) sentences_with_timecodes = [] current_time = 0 for sentence in sentences: sentence_duration = (len(sentence) / total_chars) * video_duration end_time = current_time + sentence_duration timecode = format_time(current_time) sentences_with_timecodes.append((timecode, sentence)) current_time = end_time return sentences_with_timecodes def get_sentences_before_anomalies(sentences_with_timecodes, anomaly_timecodes, time_threshold=5): anomaly_sentences = {} for anomaly_timecode in anomaly_timecodes: try: anomaly_time = flexible_timecode_to_seconds(anomaly_timecode) relevant_sentences = [ (timecode, sentence) for timecode, sentence in sentences_with_timecodes if 0 <= anomaly_time - flexible_timecode_to_seconds(timecode) <= time_threshold ] if relevant_sentences: # Use the sentences as the key to avoid duplicates key = tuple((timecode, sentence) for timecode, sentence in relevant_sentences) if key not in anomaly_sentences: anomaly_sentences[key] = anomaly_timecode except Exception as e: print(f"Error processing anomaly timecode {anomaly_timecode}: {str(e)}") continue return [(timecode, list(sentences)) for sentences, timecode in anomaly_sentences.items()] def timecode_to_seconds(timecode): h, m, s = map(float, timecode.split(':')) return h * 3600 + m * 60 + s