Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,209 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
-
import google.
|
4 |
-
import mimetypes
|
5 |
from pydub import AudioSegment
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
)
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
try:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
)
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
else:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
return transcript
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
def extract_audio_from_video(video_file):
|
71 |
-
try:
|
72 |
-
audio = AudioSegment.from_file(video_file)
|
73 |
-
audio_path = "extracted_audio.mp3"
|
74 |
-
audio.export(audio_path, format="mp3")
|
75 |
-
return audio_path, None
|
76 |
-
except Exception as e:
|
77 |
-
return None, f"Памылка пры выдзяленні аўдыі з відэафайла: {str(e)}"
|
78 |
-
|
79 |
-
def process_video(video):
|
80 |
-
audio_path, error = extract_audio_from_video(video)
|
81 |
-
if error:
|
82 |
-
return error, None
|
83 |
-
return process_audio(audio_path)
|
84 |
-
|
85 |
-
def check_audio_length(audio):
|
86 |
-
if audio is not None:
|
87 |
-
try:
|
88 |
-
audio_seg = AudioSegment.from_file(audio)
|
89 |
-
if audio_seg.duration_seconds > 600:
|
90 |
-
return "Памылка: Аўдыёфайл даўжэй за 10 хвілін."
|
91 |
-
else:
|
92 |
-
return ""
|
93 |
-
except Exception as e:
|
94 |
-
return f"Памылка пры праверцы аўдыё: {str(e)}"
|
95 |
-
return ""
|
96 |
-
|
97 |
-
def check_video_length(video):
|
98 |
-
if video is not None:
|
99 |
-
try:
|
100 |
-
audio_seg = AudioSegment.from_file(video)
|
101 |
-
if audio_seg.duration_seconds > 600:
|
102 |
-
return "Памылка: Відэафайл даўжэй за 10 хвілін."
|
103 |
-
else:
|
104 |
-
return ""
|
105 |
-
except Exception as e:
|
106 |
-
return f"Памылка пры праверцы відэа: {str(e)}"
|
107 |
-
return ""
|
108 |
-
|
109 |
-
def process_file(audio, video):
|
110 |
-
if audio is not None:
|
111 |
-
error = check_audio_length(audio)
|
112 |
-
if error:
|
113 |
-
return error, None
|
114 |
-
return process_audio(audio)
|
115 |
-
elif video is not None:
|
116 |
-
error = check_video_length(video)
|
117 |
-
if error:
|
118 |
-
return error, None
|
119 |
-
return process_video(video)
|
120 |
-
else:
|
121 |
-
return "Няма файла для апрацоўкі.", None
|
122 |
-
|
123 |
-
def on_audio_change(audio):
|
124 |
-
# Калі загружаны аўдыёфайл, адключаем відэафайл і правяраем працягласць
|
125 |
-
if audio is not None:
|
126 |
-
error_msg = check_audio_length(audio)
|
127 |
-
return gr.update(value=None, interactive=False), error_msg
|
128 |
-
else:
|
129 |
-
return gr.update(interactive=True), ""
|
130 |
-
|
131 |
-
def on_video_change(video):
|
132 |
-
# Калі загружаны відэафайл, адключаем аўдыёфайл і правяраем працягласць
|
133 |
-
if video is not None:
|
134 |
-
error_msg = check_video_length(video)
|
135 |
-
return gr.update(value=None, interactive=False), error_msg
|
136 |
-
else:
|
137 |
-
return gr.update(interactive=True), ""
|
138 |
-
|
139 |
-
def translate_transcript(transcript, target_language):
|
140 |
-
try:
|
141 |
-
prompt_text = (
|
142 |
-
f"перакладзі толькі тэксты субцітраў на {target_language} мову. Астатняя пакінь як ёсць."
|
143 |
-
f"Тэкст:\n{transcript}"
|
144 |
)
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
with
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
with gr.
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
# Поле Транскрыпцыя для паказу памылак будзе агульным
|
185 |
-
transcript_output = gr.Textbox(label="Транскрыпцыя", lines=10)
|
186 |
-
# Пры загрузцы аўдыё або відэа запускаем праверку працягласці і абнаўляем адпаведна поле Транскрыпцыя
|
187 |
-
audio_input.change(fn=on_audio_change, inputs=audio_input, outputs=[video_input, transcript_output])
|
188 |
-
video_input.change(fn=on_video_change, inputs=video_input, outputs=[audio_input, transcript_output])
|
189 |
-
|
190 |
-
btn = gr.Button("Апрацаваць")
|
191 |
-
file_output = gr.File(label="SRT-файл")
|
192 |
-
btn.click(fn=process_file, inputs=[audio_input, video_input], outputs=[transcript_output, file_output])
|
193 |
-
|
194 |
-
gr.Markdown("## Пераклад субцітраў")
|
195 |
-
with gr.Row():
|
196 |
-
language_dropdown = gr.Dropdown(
|
197 |
-
choices=["English", "Беларуская", "Руcкая", "Польская", "Літоўская", "Нямецкая"],
|
198 |
-
label="Выберы мову перакладу", value="English"
|
199 |
)
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
1 |
+
# gemini_srt_generator_improved.py
|
2 |
+
"""A Gradio interface that generates SRT subtitles from audio or video files
|
3 |
+
using the new `google.genai` SDK (Gemini models).
|
4 |
+
|
5 |
+
Key improvements over the previous version
|
6 |
+
-----------------------------------------
|
7 |
+
1. Switched from **google.generativeai** ➜ **google.genai** (new SDK).
|
8 |
+
2. File validation (size / MIME‑type) **before** uploading to Gemini → cheaper &
|
9 |
+
safer.
|
10 |
+
3. Robust *retry* wrapper with exponential back‑off + global request timeout.
|
11 |
+
4. Automatic timestamped history folder (`transcripts/`) so results are never
|
12 |
+
overwritten.
|
13 |
+
5. Cleaner UI: • progress bar (gr.Progress) • automatic scroll to bottom in the
|
14 |
+
live status textbox.
|
15 |
+
6. Configurable model + token limit via UI dropdown.
|
16 |
+
7. Minor refactor & type hints.
|
17 |
+
"""
|
18 |
+
from __future__ import annotations
|
19 |
+
|
20 |
+
import json
|
21 |
+
import mimetypes
|
22 |
import os
|
23 |
+
import threading
|
24 |
+
import time
|
25 |
+
import uuid
|
26 |
+
from datetime import datetime
|
27 |
+
from functools import wraps
|
28 |
+
from pathlib import Path
|
29 |
+
from typing import Callable, List, Tuple
|
30 |
+
|
31 |
import gradio as gr
|
32 |
+
import google.genai as genai # NEW SDK ✅
|
|
|
33 |
from pydub import AudioSegment
|
34 |
|
35 |
+
# -----------------------
|
36 |
+
# CONSTANTS & CONFIG
|
37 |
+
# -----------------------
|
38 |
+
MAX_FILE_SIZE_MB = 200 # Hard limit to prevent huge uploads
|
39 |
+
ALLOWED_AUDIO_PREFIXES = ("audio/",)
|
40 |
+
ALLOWED_VIDEO_PREFIXES = ("video/",)
|
41 |
+
HISTORY_DIR = Path("transcripts")
|
42 |
+
HISTORY_DIR.mkdir(exist_ok=True)
|
43 |
|
44 |
+
DEFAULT_MODEL = "gemini-2.5-flash-preview-04-17"
|
45 |
+
FALLBACK_MODEL = "gemini-2.5-flash"
|
46 |
|
47 |
+
GENERATION_BASE_CONFIG = {
|
48 |
+
"temperature": 0.35,
|
49 |
+
"top_p": 0.95,
|
50 |
+
"top_k": 64,
|
51 |
+
"response_mime_type": "application/json",
|
52 |
+
}
|
53 |
+
|
54 |
+
# -----------------------
|
55 |
+
# Utils
|
56 |
+
# -----------------------
|
57 |
+
|
58 |
+
def retry(retries: int = 3, delay: float = 3.0, backoff: float = 2.0):
|
59 |
+
"""Simple exponential‑backoff retry decorator."""
|
60 |
+
|
61 |
+
def decorator(func):
|
62 |
+
@wraps(func)
|
63 |
+
def wrapper(*args, **kwargs):
|
64 |
+
_delay = delay
|
65 |
+
last_exc = None
|
66 |
+
for attempt in range(1, retries + 1):
|
67 |
+
try:
|
68 |
+
return func(*args, **kwargs)
|
69 |
+
except Exception as exc: # noqa: BLE001
|
70 |
+
last_exc = exc
|
71 |
+
if attempt == retries:
|
72 |
+
break
|
73 |
+
time.sleep(_delay)
|
74 |
+
_delay *= backoff
|
75 |
+
raise last_exc # Re‑raise after exhausting retries
|
76 |
+
|
77 |
+
return wrapper
|
78 |
+
|
79 |
+
return decorator
|
80 |
+
|
81 |
+
|
82 |
+
def seconds_to_timestamp(sec: float) -> str:
|
83 |
+
h, remainder = divmod(sec, 3600)
|
84 |
+
m, remainder = divmod(remainder, 60)
|
85 |
+
s = int(remainder)
|
86 |
+
ms = int(round((remainder - s) * 1000))
|
87 |
+
return f"{int(h):02d}:{int(m):02d}:{s:02d},{ms:03d}"
|
88 |
+
|
89 |
+
|
90 |
+
# -----------------------
|
91 |
+
# Validation helpers
|
92 |
+
# -----------------------
|
93 |
+
|
94 |
+
def _validate_file(path: str, allowed_prefixes: tuple[str, ...]) -> None:
|
95 |
+
if not path or not os.path.isfile(path):
|
96 |
+
raise ValueError("Файл не знойдзены.")
|
97 |
+
|
98 |
+
size_mb = os.path.getsize(path) / 1024 / 1024
|
99 |
+
if size_mb > MAX_FILE_SIZE_MB:
|
100 |
+
raise ValueError(
|
101 |
+
f"Файл занадта вялікі: {size_mb:.1f} MB > {MAX_FILE_SIZE_MB} MB."
|
102 |
)
|
103 |
+
|
104 |
+
mime, _ = mimetypes.guess_type(path)
|
105 |
+
if not mime or not mime.startswith(allowed_prefixes):
|
106 |
+
raise ValueError(f"Непадтрыманы тып файла: {mime or 'невядомы'}.")
|
107 |
+
|
108 |
+
|
109 |
+
# -----------------------
|
110 |
+
# Gemini helpers
|
111 |
+
# -----------------------
|
112 |
+
|
113 |
+
def _configure_genai(api_key: str) -> None:
|
114 |
+
if not api_key:
|
115 |
+
raise ValueError("Не знойдзены API‑ключ для Gemini (env var `GEMINI_API_KEY`).")
|
116 |
+
genai.configure(api_key=api_key, request_timeout=90) # global 90 s timeout
|
117 |
+
|
118 |
+
|
119 |
+
def _get_model(name: str):
|
120 |
+
return genai.GenerativeModel(model_name=name, generation_config=GENERATION_BASE_CONFIG)
|
121 |
+
|
122 |
+
|
123 |
+
@retry(retries=3)
|
124 |
+
def _upload_to_gemini(path: str, status_callback: Callable[[str], None]):
|
125 |
+
mime_type, _ = mimetypes.guess_type(path)
|
126 |
+
status_callback("📤 Загружаем файл у Gemini …")
|
127 |
+
file_obj = genai.upload_file(path, mime_type=mime_type)
|
128 |
+
status_callback("✅ Файл загружаны.")
|
129 |
+
return file_obj
|
130 |
+
|
131 |
+
|
132 |
+
@retry(retries=3)
|
133 |
+
def _transcribe(file_obj, model, status_callback: Callable[[str], None]):
|
134 |
+
status_callback("🔍 Пачынаем транскрыпцыю …")
|
135 |
+
chat = model.start_chat(history=[])
|
136 |
+
return chat.send_message(file_obj)
|
137 |
+
|
138 |
+
|
139 |
+
# -----------------------
|
140 |
+
# Core processing
|
141 |
+
# -----------------------
|
142 |
+
|
143 |
+
def transcribe_audio(audio_path: str, model_name: str, status_callback: Callable[[str], None]):
|
144 |
+
_validate_file(audio_path, ALLOWED_AUDIO_PREFIXES)
|
145 |
+
file_obj = _upload_to_gemini(audio_path, status_callback)
|
146 |
+
|
147 |
+
stop_event = threading.Event()
|
148 |
+
|
149 |
+
def _progress():
|
150 |
+
frames = ["⏳", "⏳.", "⏳..", "⏳..."]
|
151 |
+
while not stop_event.is_set():
|
152 |
+
for frame in frames:
|
153 |
+
if stop_event.is_set():
|
154 |
+
break
|
155 |
+
status_callback(f"Транскрыпцыя ідзе {frame}")
|
156 |
+
time.sleep(0.6)
|
157 |
+
|
158 |
+
thread = threading.Thread(target=_progress)
|
159 |
+
thread.start()
|
160 |
+
|
161 |
try:
|
162 |
+
model = _get_model(model_name)
|
163 |
+
response = _transcribe(file_obj, model, status_callback)
|
164 |
+
finally:
|
165 |
+
stop_event.set()
|
166 |
+
thread.join()
|
167 |
+
|
168 |
+
if not response.text:
|
169 |
+
raise RuntimeError("❌ Пусты адказ ад мадэлі.")
|
170 |
+
|
171 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
172 |
+
raw_json_path = HISTORY_DIR / f"response_{timestamp}.json"
|
173 |
+
raw_json_path.write_text(response.text, encoding="utf-8")
|
174 |
+
|
175 |
+
status_callback("📥 Апрацоўка транскрыпцыі …")
|
176 |
+
return json.loads(response.text)
|
177 |
+
|
178 |
+
|
179 |
+
def transcripts_to_srt(transcripts: List[dict]) -> Tuple[str, Path]:
|
180 |
+
srt_lines: list[str] = []
|
181 |
+
for idx, seg in enumerate(transcripts, start=1):
|
182 |
+
start_ts = seconds_to_timestamp(seg["start"])
|
183 |
+
end_ts = seconds_to_timestamp(seg["end"])
|
184 |
+
srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{seg['text']}\n")
|
185 |
+
content = "\n".join(srt_lines)
|
186 |
+
|
187 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
188 |
+
filename = HISTORY_DIR / f"subtitles_{timestamp}.srt"
|
189 |
+
filename.write_text(content, encoding="utf-8")
|
190 |
+
return content, filename
|
191 |
+
|
192 |
+
|
193 |
+
def extract_audio_from_video(video_file: str, status_callback: Callable[[str], None]) -> str:
|
194 |
+
_validate_file(video_file, ALLOWED_VIDEO_PREFIXES)
|
195 |
+
status_callback("🎞 Вылучаем аўдыё з відэа …")
|
196 |
+
audio = AudioSegment.from_file(video_file)
|
197 |
+
path = f"extracted_{uuid.uuid4().hex}.mp3"
|
198 |
+
audio.export(path, format="mp3")
|
199 |
+
status_callback("✅ Аўдыё вылучана.")
|
200 |
+
return path
|
201 |
+
|
202 |
+
|
203 |
+
def process_audio(audio_path: str, model_name: str, status_callback):
|
204 |
+
transcripts = transcribe_audio(audio_path, model_name, status_callback)
|
205 |
+
status_callback("📝 Канвертацыя ў SRT …")
|
206 |
+
return transcripts_to_srt(transcripts)
|
207 |
+
|
208 |
+
|
209 |
+
def process_video(video_path: str, model_name: str, status_callback):
|
210 |
+
audio_path = extract_audio_from_video(video_path, status_callback)
|
211 |
+
return process_audio(audio_path, model_name, status_callback)
|
212 |
+
|
213 |
+
|
214 |
+
def process_file(audio: str | None, video: str | None, model_name: str, progress: gr.Progress, status_callback):
|
215 |
+
status_callback("🔄 Пачатак апрацоўкі …")
|
216 |
+
result: Tuple[str, Path]
|
217 |
+
with progress:
|
218 |
+
if audio:
|
219 |
+
result = process_audio(audio, model_name, status_callback)
|
220 |
+
elif video:
|
221 |
+
result = process_video(video, model_name, status_callback)
|
222 |
else:
|
223 |
+
raise ValueError("Ні адзін файл не загружаны.")
|
224 |
+
status_callback("✅ Гатова!")
|
225 |
+
return result
|
|
|
226 |
|
227 |
+
|
228 |
+
# -----------------------
|
229 |
+
# Gradio UI
|
230 |
+
# -----------------------
|
231 |
+
|
232 |
+
def build_ui():
|
233 |
+
api_key_default = os.getenv("GEMINI_API_KEY", "")
|
234 |
+
|
235 |
+
with gr.Blocks(title="Gemini SRT Generator (Belarusian Edition)") as demo:
|
236 |
+
gr.Markdown(
|
237 |
+
"""
|
238 |
+
## Загрузіце аўдыё- ці відэафайл — атрымайце субцітры SRT
|
239 |
+
[Суполка беларускага ШІ](https://t.me/belarusai) •
|
240 |
+
[Buy Me A Coffee](https://buymeacoffee.com/tuteishygpt)
|
241 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
)
|
243 |
+
|
244 |
+
with gr.Row():
|
245 |
+
api_key_box = gr.Textbox(
|
246 |
+
label="🔑 Gemini API‑key (калі не ўсталяваны як env)",
|
247 |
+
type="password",
|
248 |
+
value=api_key_default,
|
249 |
+
)
|
250 |
+
with gr.Row():
|
251 |
+
model_dropdown = gr.Dropdown(
|
252 |
+
[DEFAULT_MODEL, FALLBACK_MODEL],
|
253 |
+
value=DEFAULT_MODEL,
|
254 |
+
label="🧠 Мадэль Gemini",
|
255 |
+
)
|
256 |
+
with gr.Row():
|
257 |
+
audio_input = gr.Audio(type="filepath", label="🎙 Аўдыёфайл")
|
258 |
+
video_input = gr.Video(label="🎥 Відэафайл")
|
259 |
+
btn = gr.Button("🚀 Апрацаваць")
|
260 |
+
with gr.Row():
|
261 |
+
transcript_output = gr.Textbox(
|
262 |
+
label="📄 SRT-транскрыпцыя", lines=12, autoscroll=True
|
263 |
+
)
|
264 |
+
file_output = gr.File(label="⬇️ SRT-файл")
|
265 |
+
status_output = gr.Textbox(label="🛠️ Статус", interactive=False, autoscroll=True)
|
266 |
+
|
267 |
+
def wrapped_process(audio, video, api_key, model_name, progress=gr.Progress()):
|
268 |
+
_configure_genai(api_key or api_key_default)
|
269 |
+
|
270 |
+
def update_status(text):
|
271 |
+
status_output.value = text
|
272 |
+
# Force scroll to bottom (JS hack)
|
273 |
+
status_output.scroll_to_end()
|
274 |
+
|
275 |
+
content, file_path = process_file(audio, video, model_name, progress, update_status)
|
276 |
+
return content, file_path
|
277 |
+
|
278 |
+
btn.click(
|
279 |
+
fn=wrapped_process,
|
280 |
+
inputs=[audio_input, video_input, api_key_box, model_dropdown],
|
281 |
+
outputs=[transcript_output, file_output],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
)
|
283 |
+
|
284 |
+
return demo
|
285 |
+
|
286 |
+
|
287 |
+
def main():
|
288 |
+
demo = build_ui()
|
289 |
+
demo.launch()
|
290 |
+
|
291 |
+
|
292 |
+
if __name__ == "__main__":
|
293 |
+
main()
|