qqwjq1981 commited on
Commit
9c80aa8
·
verified ·
1 Parent(s): aeaf04e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -22
app.py CHANGED
@@ -16,6 +16,8 @@ import json
16
  from nltk.tokenize import sent_tokenize
17
  import logging
18
  from textblob import TextBlob
 
 
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -30,27 +32,23 @@ def transcribe_video(video_path):
30
  video = VideoFileClip(video_path)
31
  audio_path = "audio.wav"
32
  video.audio.write_audiofile(audio_path)
33
-
34
- # Initialize recognizer class (for recognizing the speech)
35
- recognizer = sr.Recognizer()
36
-
37
- # Use SpeechRecognition to transcribe audio
38
- with sr.AudioFile(audio_path) as source:
39
- audio_text = recognizer.record(source)
40
- transcript = recognizer.recognize_google(audio_text)
41
-
42
- # Split transcript into sentences
43
- sentences = split_into_sentences(transcript)
44
-
45
- # Create a list of timestamps for each sentence
46
- timestamps = []
47
- duration_per_sentence = len(audio_text.frame_data) / len(sentences) / 44100 # Approximate duration per sentence in seconds
48
 
49
- for i, sentence in enumerate(sentences):
50
- start_time = i * duration_per_sentence
51
- timestamps.append({"start": start_time, "text": sentence})
52
 
53
- return timestamps
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Function to get the appropriate translation model based on target language
56
  def get_translation_model(target_language):
@@ -79,7 +77,8 @@ def translate_text(transcription_json, target_language):
79
  translated_json.append({
80
  "start": entry["start"],
81
  "original": original_text,
82
- "translated": translated_text
 
83
  })
84
 
85
  # Return the translated timestamps as a JSON string
@@ -94,10 +93,10 @@ def add_transcript_to_video(video_path, timestamps, output_path):
94
 
95
  for entry in timestamps:
96
  # Create a text clip for each sentence
97
- txt_clip = TextClip(entry["text"], fontsize=24, color='white', bg_color='black', size=video.size)
98
 
99
  # Set the start time and duration for each text clip
100
- txt_clip = txt_clip.set_start(entry["start"]).set_duration(3).set_position(('bottom')).set_opacity(0.7) # Display each sentence for 3 seconds
101
 
102
  # Append the text clip to the list
103
  text_clips.append(txt_clip)
 
16
  from nltk.tokenize import sent_tokenize
17
  import logging
18
  from textblob import TextBlob
19
+ import whisper
20
+
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
 
32
  video = VideoFileClip(video_path)
33
  audio_path = "audio.wav"
34
  video.audio.write_audiofile(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Load Whisper model
37
+ model = whisper.load_model("base") # Options: tiny, base, small, medium, large
 
38
 
39
+ # Transcribe with Whisper
40
+ result = model.transcribe(audio_path, word_timestamps=True)
41
+
42
+ # Extract timestamps and text
43
+ transcript_with_timestamps = [
44
+ {
45
+ "start": segment["start"],
46
+ "end": segment["end"],
47
+ "text": segment["text"]
48
+ }
49
+ for segment in result["segments"]
50
+ ]
51
+ return transcript_with_timestamps
52
 
53
  # Function to get the appropriate translation model based on target language
54
  def get_translation_model(target_language):
 
77
  translated_json.append({
78
  "start": entry["start"],
79
  "original": original_text,
80
+ "translated": translated_text,
81
+ "end": entry["end"]
82
  })
83
 
84
  # Return the translated timestamps as a JSON string
 
93
 
94
  for entry in timestamps:
95
  # Create a text clip for each sentence
96
+ txt_clip = TextClip(entry["translated"], fontsize=24, color='white', bg_color='black', size=video.size)
97
 
98
  # Set the start time and duration for each text clip
99
+ txt_clip = txt_clip.set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_position(('bottom')).set_opacity(0.7) # Display each sentence for 3 seconds
100
 
101
  # Append the text clip to the list
102
  text_clips.append(txt_clip)