File size: 2,655 Bytes
7288748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a717b8a
9f4877d
a717b8a
7288748
f3212bd
5c20ea7
 
 
a717b8a
d357f9b
 
 
 
 
 
 
 
 
 
 
 
7288748
d357f9b
7288748
d357f9b
 
 
 
 
 
7288748
 
 
2ced026
 
 
 
 
66e1942
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from pathlib import Path
from typing import Any
from collections import OrderedDict

from pytube import YouTube
import whisper

from transforming.transform import Transform
from video import YoutubeVideo
from utils import accepts_types

class WhisperTransform(Transform):
    """
    Transform a Video object using Whisper model. It's a
    concrete Transform.
    Args:
        model (`str`):
            Size of Whisper model. Can be tiny, base (default), small, medium, and large.
        without_timestamps (`bool`, defaults to `False`):
            To add phrase-level timestamps.
    """

    def __init__(self, model: str="base", without_timestamps: bool=False) -> None:
        self.model = whisper.load_model(model)
        self.without_timestamps = without_timestamps

    @accepts_types(YoutubeVideo) 
    def apply(self, video: YoutubeVideo) -> YoutubeVideo:
        """Creates a new video with transcriptions created by Whisper.
        """
        # Create a YouTube object
        try:
            yt = YouTube(video.url)
        except Exception as e:
            print ("Video not available \n")
            print(f"Exception: {e}")
            pass

        print(f"Video title and url: {video.title} {video.url}")
        audio_file = self._get_audio_from_video(yt)

        try:
            result = self.model.transcribe(audio_file, 
                                           without_timestamps=self.without_timestamps)
        except Exception as e:
            print(f"Audio exception print: {e}")
            pass
        else:
            transcription = result["text"]

            data = []
            for seg in result['segments']:
                data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']}))

            os.remove(audio_file)

            return YoutubeVideo(channel_name = video.channel_name,
                                url = video.url,
                                title = video.title,
                                description = video.description,
                                transcription = transcription,
                                segments = data)
        
    def _get_audio_from_video(self, yt: Any) -> Path:
        # TODO: Add credits
        try:
            video = yt.streams.filter(only_audio=True).first()
        except Exception as e:
            print(f"StreamingData exception print: {e}")
            pass
        else:
            out_file = video.download(output_path=".")
            base, _ = os.path.splitext(out_file)
            new_file = base + ".mp3"
            os.rename(out_file, new_file)
            return new_file