File size: 4,759 Bytes
6160629
 
a9913d9
d9d75ab
06555da
6160629
c3bc5d7
e64464f
a9913d9
 
 
 
 
 
 
 
265d4cc
 
 
6160629
d9d75ab
265d4cc
a9913d9
265d4cc
6160629
 
3ab57cd
a9913d9
 
a4cbcc8
a9913d9
265d4cc
6160629
a9913d9
 
265d4cc
 
 
a9913d9
265d4cc
 
631160f
a9913d9
 
631160f
265d4cc
3ab57cd
a9913d9
265d4cc
d9d75ab
f2c750c
d9d75ab
f2c750c
d9d75ab
a9913d9
 
265d4cc
 
 
a9913d9
 
631160f
a9913d9
 
6160629
265d4cc
9dab23b
06555da
6c7418c
 
265d4cc
9dab23b
a9913d9
 
265d4cc
 
 
9dab23b
265d4cc
9dab23b
 
a9913d9
265d4cc
 
 
 
 
a9913d9
9dab23b
265d4cc
a9913d9
265d4cc
a9913d9
265d4cc
f2c750c
a9913d9
 
 
f2c750c
a9913d9
 
265d4cc
f7a8b9d
a9913d9
9dab23b
 
2376307
265d4cc
5e7fffb
f7a8b9d
 
a9913d9
265d4cc
a9913d9
 
 
265d4cc
3ab57cd
631160f
f2c750c
 
a9913d9
 
265d4cc
f2c750c
a9913d9
 
f2c750c
 
a9913d9
f2c750c
 
 
265d4cc
f2c750c
d9d75ab
7d946d9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import gradio as gr
from google import genai
import mimetypes
from pydub import AudioSegment

# Атрыманне ключоў і мадэляў з пераменных асяроддзя
GEMINI_API_KEY = os.getenv("gemini")
MODEL_NAME_TH = os.getenv("modTH")
MODEL_NAME = os.getenv("mod")
PROMPT_TRANSCRIBE = os.getenv("p")

# Стварэнне сесіі і мадэляў
session = genai.Session(api_key=GEMINI_API_KEY)
model_th = genai.GenerativeModel(model_name=MODEL_NAME_TH, session=session)
model_general = genai.GenerativeModel(model_name=MODEL_NAME, session=session)


def transcribe_audio(audio_file: str) -> str:
    try:
        mime_type, _ = mimetypes.guess_type(audio_file)
        if not mime_type or not mime_type.startswith("audio"):
            return "Файл не аўдыёфармату."

        with open(audio_file, "rb") as f:
            audio_data = f.read()

        response = model_th.generate_content(
            [PROMPT_TRANSCRIBE, {"mime_type": mime_type, "data": audio_data}]
        )
        return response.text.strip()

    except Exception as e:
        return f"Памылка: {e}"


def fix_subtitles_format(transcript: str) -> str:
    prompt_fix = (
        "Не змяняй тэксты, толькі выправі фармат часу ў субцітрах на 00:00:01,589:\n"
        f"{transcript}"
    )
    try:
        response_fix = model_general.generate_content(prompt_fix)
        return response_fix.text.strip()
    except Exception as e:
        return transcript


def create_srt(transcript: str, filename: str = "subtitles.srt") -> tuple[str, str]:
    try:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(transcript)
        return transcript, filename
    except Exception as e:
        return f"Памылка запісу SRT: {e}", ""


def process_audio(audio_path: str) -> tuple[str, str]:
    transcript = transcribe_audio(audio_path)
    if transcript.startswith("Памылка"):
        return transcript, ""
    fixed_transcript = fix_subtitles_format(transcript)
    return create_srt(fixed_transcript)


def extract_audio_from_video(video_file: str) -> tuple[str, str]:
    try:
        audio = AudioSegment.from_file(video_file)
        audio_path = "extracted_audio.mp3"
        audio.export(audio_path, format="mp3")
        return audio_path, ""
    except Exception as e:
        return "", f"Памылка аўдыё з відэа: {e}"


def process_video(video_path: str) -> tuple[str, str]:
    audio_path, error = extract_audio_from_video(video_path)
    if error:
        return error, ""
    return process_audio(audio_path)


def process_file(audio_path: str | None, video_path: str | None) -> tuple[str, str]:
    if audio_path:
        return process_audio(audio_path)
    elif video_path:
        return process_video(video_path)
    return "Няма файла для апрацоўкі.", ""


def translate_transcript(transcript: str, target_language: str) -> tuple[str, str]:
    prompt_text = (
        f"Перакладзі тэксты субцітраў на {target_language} мову, астатняе не змяняй:\n{transcript}"
    )
    try:
        response = model_general.generate_content(prompt_text)
        translated = response.text.strip()
        return create_srt(translated, "translated_subtitles.srt")
    except Exception as e:
        return f"Памылка перакладу: {e}", ""


with gr.Blocks() as demo:
    gr.Markdown("# Транскрыпцыя аўдыя (беларуская мова)")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Аўдыёфайл")
        video_input = gr.Video(label="Відэафайл")

    btn = gr.Button("Апрацаваць")
    transcript_output = gr.Textbox(label="Транскрыпцыя", lines=10)
    file_output = gr.File(label="SRT-файл")

    btn.click(
        fn=process_file,
        inputs=[audio_input, video_input],
        outputs=[transcript_output, file_output],
    )

    gr.Markdown("## Пераклад субцітраў")
    with gr.Row():
        language_dropdown = gr.Dropdown(
            ["English", "Руcкая", "Польская", "Літоўская", "Нямецкая"],
            label="Мова перакладу",
            value="English",
        )
        translate_btn = gr.Button("Перакласці")

    translation_output = gr.Textbox(label="Пераклад", lines=10)
    translation_file_output = gr.File(label="Translated SRT-файл")

    translate_btn.click(
        fn=translate_transcript,
        inputs=[transcript_output, language_dropdown],
        outputs=[translation_output, translation_file_output],
    )

demo.launch()