qqwjq1981 commited on
Commit
d5c151e
·
verified ·
1 Parent(s): 0a3a2c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -15,11 +15,16 @@ import speech_recognition as sr
15
  import json
16
  from nltk.tokenize import sent_tokenize
17
  import logging
 
18
 
19
  # Configure logging
20
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
21
  logger = logging.getLogger(__name__)
22
 
 
 
 
 
23
  def transcribe_video(video_path):
24
  # Load the video file and extract audio
25
  video = VideoFileClip(video_path)
@@ -35,7 +40,7 @@ def transcribe_video(video_path):
35
  transcript = recognizer.recognize_google(audio_text)
36
 
37
  # Split transcript into sentences
38
- sentences = sent_tokenize(transcript)
39
 
40
  # Create a list of timestamps for each sentence
41
  timestamps = []
 
15
  import json
16
  from nltk.tokenize import sent_tokenize
17
  import logging
18
+ from textblob import TextBlob
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
22
  logger = logging.getLogger(__name__)
23
 
24
+ def split_into_sentences(text):
25
+ blob = TextBlob(text)
26
+ return [str(sentence) for sentence in blob.sentences]
27
+
28
  def transcribe_video(video_path):
29
  # Load the video file and extract audio
30
  video = VideoFileClip(video_path)
 
40
  transcript = recognizer.recognize_google(audio_text)
41
 
42
  # Split transcript into sentences
43
+ sentences = split_into_sentences(transcript)
44
 
45
  # Create a list of timestamps for each sentence
46
  timestamps = []