File size: 3,260 Bytes
270736b
 
 
 
417e147
 
 
270736b
 
 
 
 
 
 
9c68228
270736b
669dcca
 
270736b
 
 
 
 
 
 
 
 
 
417e147
 
 
 
 
 
 
270736b
 
417e147
270736b
 
 
 
417e147
270736b
 
 
 
417e147
270736b
417e147
 
270736b
 
 
 
669dcca
 
270736b
 
669dcca
 
 
 
 
 
270736b
 
417e147
 
669dcca
417e147
 
669dcca
 
 
270736b
669dcca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import time
from fastapi import APIRouter, Depends, HTTPException, status
from faster_whisper import WhisperModel

import os

from libs.convert_to_audio import convert_to_audio
from libs.header_api_auth import get_api_key

router = APIRouter(prefix="/get-transcript", tags=["transcript"])

# model_size: distil-large-v2
# model_size: distil-large-v3

# api_key: str = Depends(get_api_key)
@router.get("/")
def get_transcript(audio_path: str, model_size: str = "distil-large-v3", api_key: str = Depends(get_api_key)):
    # Run on GPU with FP16
    # model = WhisperModel(model_size, device="cuda", compute_type="float16")

    # or run on GPU with INT8
    # model = WhisperModel(model_size, device="cuda", cosmpute_type="int8_float16")
    # or run on CPU with INT8
    # model_run = WhisperModel(model_size, device="cpu", compute_type="int8")


    print(f"model>>>: {model_size}")

    output_audio_folder = f"./cached/audio"

    if not os.path.exists(output_audio_folder):
        os.makedirs(output_audio_folder)

    output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3"

    st = time.time()

    convert_to_audio(audio_path.strip(), output_file)

    try:
        model_run = WhisperModel(model_size, device="cpu", compute_type="int8")
        segments, info = model_run.transcribe(
            output_file,
            beam_size=16,
            language="en",
            condition_on_previous_text=False,
        )
        os.remove(output_file)
    except Exception as error:
        if os.path.exists(output_file):
            os.remove(output_file)
        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}")

    text = ""

    listSentences = []

    for segment in segments:
        text += segment.text
        listSentences.append({
            "start_time": segment.start,
            "end_time": segment.end,
            "text": segment.text
        })

    et = time.time()
    elapsed_time = et - st


    return {"text": text,
            'list_sentence':  listSentences,
            'elapsed_time': round(elapsed_time, 2)
            }

#     time.sleep(5)

#     return {
#     "text": " She has a dimble on her left cheek, it adds charm to her facial features. The dimple is a genetic trait that she inherited from her mother. She's always been proud of it. People compliment her on it. She can't help but smile wider.",
#     "list_sentence": [
#         {
#             "start_time": 0.0,
#             "end_time": 8.0,
#             "text": " She has a dimble on her left cheek, it adds charm to her facial features."
#         },
#         {
#             "start_time": 8.0,
#             "end_time": 16.0,
#             "text": " The dimple is a genetic trait that she inherited from her mother."
#         },
#         {
#             "start_time": 16.0,
#             "end_time": 20.0,
#             "text": " She's always been proud of it."
#         },
#         {
#             "start_time": 20.0,
#             "end_time": 24.0,
#             "text": " People compliment her on it."
#         },
#         {
#             "start_time": 24.0,
#             "end_time": 28.0,
#             "text": " She can't help but smile wider."
#         }
#     ]
# }