archivartaunik commited on
Commit
9fbf18f
·
verified ·
1 Parent(s): 2b112ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +281 -197
app.py CHANGED
@@ -1,209 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
- import google.generativeai as genai
4
- import mimetypes
5
  from pydub import AudioSegment
6
 
7
- GEMINI_API_KEY = os.getenv("gembeh")
8
- MODEL_NAME_TH = os.getenv("modTH")
9
- MODEL_NAME = os.getenv("mod")
 
 
 
 
 
10
 
11
- genai.configure(api_key=GEMINI_API_KEY)
 
12
 
13
- def transcribe_audio(audio_file):
14
- try:
15
- mime_type, _ = mimetypes.guess_type(audio_file)
16
- if mime_type is None:
17
- return "Немагчыма вызначыць тып файла. Падтрымліваюцца толькі аўдыяфайлы."
18
- with open(audio_file, "rb") as f:
19
- audio_data = f.read()
20
- # Считываем тэкст запыту з сакрэта
21
- prompt_text = os.getenv("p")
22
- model = genai.GenerativeModel(MODEL_NAME_TH)
23
- response = model.generate_content(
24
- [prompt_text, {"mime_type": mime_type, "data": audio_data}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
- if response.text:
27
- transcript = response.text.strip()
28
- else:
29
- transcript = "Не атрымалася транскрыбаваць аўдыя. Магчыма, памылка з API."
30
- return transcript
31
- except FileNotFoundError:
32
- return "Памылка: Файл не знойдзены."
33
- except genai.APIError as e:
34
- return f"Памылка API: {str(e)}"
35
- except Exception as e:
36
- return f"Нечаканая памылка: {str(e)}"
37
-
38
- def fix_subtitles_format(transcript):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  try:
40
- prompt_fix = (
41
- f"Не змяняй тэксты, выправі толькі часовы фармат у субцітрах на правільны, вось прыклад 00:00:01,589 \n"
42
- f" У адказ напішы толькі субцітры: {transcript}"
43
- )
44
- model = genai.GenerativeModel(MODEL_NAME)
45
- response_fix = model.generate_content(prompt_fix)
46
- if response_fix.text:
47
- fixed_transcript = response_fix.text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  else:
49
- fixed_transcript = transcript
50
- return fixed_transcript
51
- except Exception as e:
52
- return transcript
53
 
54
- def create_srt(transcript, filename="subtitles.srt"):
55
- try:
56
- with open(filename, "w", encoding="utf-8") as f:
57
- f.write(transcript)
58
- return transcript, filename
59
- except Exception as e:
60
- return f"Памылка пры запісе SRT-файла: {str(e)}", None
61
-
62
- def process_audio(audio):
63
- transcript = transcribe_audio(audio)
64
- if transcript.startswith("Памылка"):
65
- return transcript, None
66
- fixed_transcript = fix_subtitles_format(transcript)
67
- text, srt_file = create_srt(fixed_transcript)
68
- return text, srt_file
69
-
70
- def extract_audio_from_video(video_file):
71
- try:
72
- audio = AudioSegment.from_file(video_file)
73
- audio_path = "extracted_audio.mp3"
74
- audio.export(audio_path, format="mp3")
75
- return audio_path, None
76
- except Exception as e:
77
- return None, f"Памылка пры выдзяленні аўдыі з відэафайла: {str(e)}"
78
-
79
- def process_video(video):
80
- audio_path, error = extract_audio_from_video(video)
81
- if error:
82
- return error, None
83
- return process_audio(audio_path)
84
-
85
- def check_audio_length(audio):
86
- if audio is not None:
87
- try:
88
- audio_seg = AudioSegment.from_file(audio)
89
- if audio_seg.duration_seconds > 600:
90
- return "Памылка: Аўдыёфайл даўжэй за 10 хвілін."
91
- else:
92
- return ""
93
- except Exception as e:
94
- return f"Памылка пры праверцы аўдыё: {str(e)}"
95
- return ""
96
-
97
- def check_video_length(video):
98
- if video is not None:
99
- try:
100
- audio_seg = AudioSegment.from_file(video)
101
- if audio_seg.duration_seconds > 600:
102
- return "Памылка: Відэафайл даўжэй за 10 хвілін."
103
- else:
104
- return ""
105
- except Exception as e:
106
- return f"Памылка пры праверцы відэа: {str(e)}"
107
- return ""
108
-
109
- def process_file(audio, video):
110
- if audio is not None:
111
- error = check_audio_length(audio)
112
- if error:
113
- return error, None
114
- return process_audio(audio)
115
- elif video is not None:
116
- error = check_video_length(video)
117
- if error:
118
- return error, None
119
- return process_video(video)
120
- else:
121
- return "Няма файла для апрацоўкі.", None
122
-
123
- def on_audio_change(audio):
124
- # Калі загружаны аўдыёфайл, адключаем відэафайл і правяраем працягласць
125
- if audio is not None:
126
- error_msg = check_audio_length(audio)
127
- return gr.update(value=None, interactive=False), error_msg
128
- else:
129
- return gr.update(interactive=True), ""
130
-
131
- def on_video_change(video):
132
- # Калі загружаны відэафайл, адключаем аўдыёфайл і правяраем працягласць
133
- if video is not None:
134
- error_msg = check_video_length(video)
135
- return gr.update(value=None, interactive=False), error_msg
136
- else:
137
- return gr.update(interactive=True), ""
138
-
139
- def translate_transcript(transcript, target_language):
140
- try:
141
- prompt_text = (
142
- f"перакладзі толькі тэксты субцітраў на {target_language} мову. Астатняя пакінь як ёсць."
143
- f"Тэкст:\n{transcript}"
144
  )
145
- model = genai.GenerativeModel(MODEL_NAME)
146
- response = model.generate_content(prompt_text)
147
- if response.text:
148
- translated = response.text.strip()
149
- else:
150
- translated = "Не атрымалася перакласці тэкст. Магчыма, памылка з API."
151
- translated_srt_filename = "translated_subtitles.srt"
152
- with open(translated_srt_filename, "w", encoding="utf-8") as f:
153
- f.write(translated)
154
- return translated, translated_srt_filename
155
- except Exception as e:
156
- return f"Памылка пры перакладзе: {str(e)}", None
157
-
158
- with gr.Blocks() as demo:
159
- # Дадаем Google Analytics код праз HTML-кампанент
160
- gr.HTML("""
161
- <!-- Google tag (gtag.js) -->
162
- <script async src="https://www.googletagmanager.com/gtag/js?id=G-2QZ4X58TG6"></script>
163
- <script>
164
- window.dataLayer = window.dataLayer || [];
165
- function gtag(){dataLayer.push(arguments);}
166
- gtag('js', new Date());
167
- gtag('config', 'G-2QZ4X58TG6');
168
- </script>
169
- """)
170
-
171
- gr.Markdown("# Транскрыпцыя кароткіх аўдыя для беларускай мовы")
172
- gr.Markdown(
173
- """
174
- ## Загрузіце аўдыёфайл або відэафайл да 10 хвілін. Субцітры з кароткімі тэкстамі будуць згенераваны разам з файлам субцітраў.
175
- [Ёсць пытанні ці прапановы? Далучайцеся да беларускаймоўнай суполкі штучнага інтэлекту](https://t.me/belarusai)
176
- **Хочаце каб сэрвіс працаваў? Налівайце каву! :** [Buy me a coffee](https://buymeacoffee.com/tuteishygpt)
177
- **Агучце беларускую мову тут :** [Беларуская мадэль маўлення](https://huggingface.co/spaces/archivartaunik/Bextts)
178
-
179
- """
180
- )
181
- with gr.Row():
182
- audio_input = gr.Audio(type="filepath", label="Аўдыёфайл")
183
- video_input = gr.Video(label="Відэафайл")
184
- # Поле Транскрыпцыя для паказу памылак будзе агульным
185
- transcript_output = gr.Textbox(label="Транскрыпцыя", lines=10)
186
- # Пры загрузцы аўдыё або відэа запускаем праверку працягласці і абнаўляем адпаведна поле Транскрыпцыя
187
- audio_input.change(fn=on_audio_change, inputs=audio_input, outputs=[video_input, transcript_output])
188
- video_input.change(fn=on_video_change, inputs=video_input, outputs=[audio_input, transcript_output])
189
-
190
- btn = gr.Button("Апрацаваць")
191
- file_output = gr.File(label="SRT-файл")
192
- btn.click(fn=process_file, inputs=[audio_input, video_input], outputs=[transcript_output, file_output])
193
-
194
- gr.Markdown("## Пераклад субцітраў")
195
- with gr.Row():
196
- language_dropdown = gr.Dropdown(
197
- choices=["English", "Беларуская", "Руcкая", "Польская", "Літоўская", "Нямецкая"],
198
- label="Выберы мову перакладу", value="English"
199
  )
200
- translate_btn = gr.Button("Пераклад")
201
- translation_output = gr.Textbox(label="Пераклад", lines=10)
202
- translation_file_output = gr.File(label="Translated SRT-файл")
203
- translate_btn.click(
204
- fn=translate_transcript,
205
- inputs=[transcript_output, language_dropdown],
206
- outputs=[translation_output, translation_file_output]
207
- )
208
-
209
- demo.launch()
 
 
1
+ # gemini_srt_generator_improved.py
2
+ """A Gradio interface that generates SRT subtitles from audio or video files
3
+ using the new `google.genai` SDK (Gemini models).
4
+
5
+ Key improvements over the previous version
6
+ -----------------------------------------
7
+ 1. Switched from **google.generativeai** ➜ **google.genai** (new SDK).
8
+ 2. File validation (size / MIME‑type) **before** uploading to Gemini → cheaper &
9
+ safer.
10
+ 3. Robust *retry* wrapper with exponential back‑off + global request timeout.
11
+ 4. Automatic timestamped history folder (`transcripts/`) so results are never
12
+ overwritten.
13
+ 5. Cleaner UI: • progress bar (gr.Progress) • automatic scroll to bottom in the
14
+ live status textbox.
15
+ 6. Configurable model + token limit via UI dropdown.
16
+ 7. Minor refactor & type hints.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import mimetypes
22
  import os
23
+ import threading
24
+ import time
25
+ import uuid
26
+ from datetime import datetime
27
+ from functools import wraps
28
+ from pathlib import Path
29
+ from typing import Callable, List, Tuple
30
+
31
  import gradio as gr
32
+ import google.genai as genai # NEW SDK ✅
 
33
  from pydub import AudioSegment
34
 
35
+ # -----------------------
36
+ # CONSTANTS & CONFIG
37
+ # -----------------------
38
+ MAX_FILE_SIZE_MB = 200 # Hard limit to prevent huge uploads
39
+ ALLOWED_AUDIO_PREFIXES = ("audio/",)
40
+ ALLOWED_VIDEO_PREFIXES = ("video/",)
41
+ HISTORY_DIR = Path("transcripts")
42
+ HISTORY_DIR.mkdir(exist_ok=True)
43
 
44
+ DEFAULT_MODEL = "gemini-2.5-flash-preview-04-17"
45
+ FALLBACK_MODEL = "gemini-2.5-flash"
46
 
47
+ GENERATION_BASE_CONFIG = {
48
+ "temperature": 0.35,
49
+ "top_p": 0.95,
50
+ "top_k": 64,
51
+ "response_mime_type": "application/json",
52
+ }
53
+
54
+ # -----------------------
55
+ # Utils
56
+ # -----------------------
57
+
58
+ def retry(retries: int = 3, delay: float = 3.0, backoff: float = 2.0):
59
+ """Simple exponential‑backoff retry decorator."""
60
+
61
+ def decorator(func):
62
+ @wraps(func)
63
+ def wrapper(*args, **kwargs):
64
+ _delay = delay
65
+ last_exc = None
66
+ for attempt in range(1, retries + 1):
67
+ try:
68
+ return func(*args, **kwargs)
69
+ except Exception as exc: # noqa: BLE001
70
+ last_exc = exc
71
+ if attempt == retries:
72
+ break
73
+ time.sleep(_delay)
74
+ _delay *= backoff
75
+ raise last_exc # Re‑raise after exhausting retries
76
+
77
+ return wrapper
78
+
79
+ return decorator
80
+
81
+
82
+ def seconds_to_timestamp(sec: float) -> str:
83
+ h, remainder = divmod(sec, 3600)
84
+ m, remainder = divmod(remainder, 60)
85
+ s = int(remainder)
86
+ ms = int(round((remainder - s) * 1000))
87
+ return f"{int(h):02d}:{int(m):02d}:{s:02d},{ms:03d}"
88
+
89
+
90
+ # -----------------------
91
+ # Validation helpers
92
+ # -----------------------
93
+
94
+ def _validate_file(path: str, allowed_prefixes: tuple[str, ...]) -> None:
95
+ if not path or not os.path.isfile(path):
96
+ raise ValueError("Файл не знойдзены.")
97
+
98
+ size_mb = os.path.getsize(path) / 1024 / 1024
99
+ if size_mb > MAX_FILE_SIZE_MB:
100
+ raise ValueError(
101
+ f"Файл занадта вялікі: {size_mb:.1f} MB > {MAX_FILE_SIZE_MB} MB."
102
  )
103
+
104
+ mime, _ = mimetypes.guess_type(path)
105
+ if not mime or not mime.startswith(allowed_prefixes):
106
+ raise ValueError(f"Непадтрыманы тып файла: {mime or 'невядомы'}.")
107
+
108
+
109
+ # -----------------------
110
+ # Gemini helpers
111
+ # -----------------------
112
+
113
+ def _configure_genai(api_key: str) -> None:
114
+ if not api_key:
115
+ raise ValueError("Не знойдзены API‑ключ для Gemini (env var `GEMINI_API_KEY`).")
116
+ genai.configure(api_key=api_key, request_timeout=90) # global 90 s timeout
117
+
118
+
119
+ def _get_model(name: str):
120
+ return genai.GenerativeModel(model_name=name, generation_config=GENERATION_BASE_CONFIG)
121
+
122
+
123
+ @retry(retries=3)
124
+ def _upload_to_gemini(path: str, status_callback: Callable[[str], None]):
125
+ mime_type, _ = mimetypes.guess_type(path)
126
+ status_callback("📤 Загружаем файл у Gemini …")
127
+ file_obj = genai.upload_file(path, mime_type=mime_type)
128
+ status_callback("✅ Файл загружаны.")
129
+ return file_obj
130
+
131
+
132
+ @retry(retries=3)
133
+ def _transcribe(file_obj, model, status_callback: Callable[[str], None]):
134
+ status_callback("🔍 Пачынаем транскрыпцыю …")
135
+ chat = model.start_chat(history=[])
136
+ return chat.send_message(file_obj)
137
+
138
+
139
+ # -----------------------
140
+ # Core processing
141
+ # -----------------------
142
+
143
+ def transcribe_audio(audio_path: str, model_name: str, status_callback: Callable[[str], None]):
144
+ _validate_file(audio_path, ALLOWED_AUDIO_PREFIXES)
145
+ file_obj = _upload_to_gemini(audio_path, status_callback)
146
+
147
+ stop_event = threading.Event()
148
+
149
+ def _progress():
150
+ frames = ["⏳", "⏳.", "⏳..", "⏳..."]
151
+ while not stop_event.is_set():
152
+ for frame in frames:
153
+ if stop_event.is_set():
154
+ break
155
+ status_callback(f"Транскрыпцыя ідзе {frame}")
156
+ time.sleep(0.6)
157
+
158
+ thread = threading.Thread(target=_progress)
159
+ thread.start()
160
+
161
  try:
162
+ model = _get_model(model_name)
163
+ response = _transcribe(file_obj, model, status_callback)
164
+ finally:
165
+ stop_event.set()
166
+ thread.join()
167
+
168
+ if not response.text:
169
+ raise RuntimeError("❌ Пусты адказ ад мадэлі.")
170
+
171
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
172
+ raw_json_path = HISTORY_DIR / f"response_{timestamp}.json"
173
+ raw_json_path.write_text(response.text, encoding="utf-8")
174
+
175
+ status_callback("📥 Апрацоўка транскрыпцыі …")
176
+ return json.loads(response.text)
177
+
178
+
179
+ def transcripts_to_srt(transcripts: List[dict]) -> Tuple[str, Path]:
180
+ srt_lines: list[str] = []
181
+ for idx, seg in enumerate(transcripts, start=1):
182
+ start_ts = seconds_to_timestamp(seg["start"])
183
+ end_ts = seconds_to_timestamp(seg["end"])
184
+ srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{seg['text']}\n")
185
+ content = "\n".join(srt_lines)
186
+
187
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
188
+ filename = HISTORY_DIR / f"subtitles_{timestamp}.srt"
189
+ filename.write_text(content, encoding="utf-8")
190
+ return content, filename
191
+
192
+
193
+ def extract_audio_from_video(video_file: str, status_callback: Callable[[str], None]) -> str:
194
+ _validate_file(video_file, ALLOWED_VIDEO_PREFIXES)
195
+ status_callback("🎞 Вылучаем аўдыё з відэа …")
196
+ audio = AudioSegment.from_file(video_file)
197
+ path = f"extracted_{uuid.uuid4().hex}.mp3"
198
+ audio.export(path, format="mp3")
199
+ status_callback("✅ Аўдыё вылучана.")
200
+ return path
201
+
202
+
203
+ def process_audio(audio_path: str, model_name: str, status_callback):
204
+ transcripts = transcribe_audio(audio_path, model_name, status_callback)
205
+ status_callback("📝 Канвертацыя ў SRT …")
206
+ return transcripts_to_srt(transcripts)
207
+
208
+
209
+ def process_video(video_path: str, model_name: str, status_callback):
210
+ audio_path = extract_audio_from_video(video_path, status_callback)
211
+ return process_audio(audio_path, model_name, status_callback)
212
+
213
+
214
+ def process_file(audio: str | None, video: str | None, model_name: str, progress: gr.Progress, status_callback):
215
+ status_callback("🔄 Пачатак апрацоўкі …")
216
+ result: Tuple[str, Path]
217
+ with progress:
218
+ if audio:
219
+ result = process_audio(audio, model_name, status_callback)
220
+ elif video:
221
+ result = process_video(video, model_name, status_callback)
222
  else:
223
+ raise ValueError("Ні адзін файл не загружаны.")
224
+ status_callback("✅ Гатова!")
225
+ return result
 
226
 
227
+
228
+ # -----------------------
229
+ # Gradio UI
230
+ # -----------------------
231
+
232
+ def build_ui():
233
+ api_key_default = os.getenv("GEMINI_API_KEY", "")
234
+
235
+ with gr.Blocks(title="Gemini SRT Generator (Belarusian Edition)") as demo:
236
+ gr.Markdown(
237
+ """
238
+ ## Загрузіце аўдыё- ці відэафайл — атрымайце субцітры SRT
239
+ [Суполка беларускага ШІ](https://t.me/belarusai)
240
+ [Buy Me A Coffee](https://buymeacoffee.com/tuteishygpt)
241
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  )
243
+
244
+ with gr.Row():
245
+ api_key_box = gr.Textbox(
246
+ label="🔑 Gemini API‑key (калі не ўсталяваны як env)",
247
+ type="password",
248
+ value=api_key_default,
249
+ )
250
+ with gr.Row():
251
+ model_dropdown = gr.Dropdown(
252
+ [DEFAULT_MODEL, FALLBACK_MODEL],
253
+ value=DEFAULT_MODEL,
254
+ label="🧠 Мадэль Gemini",
255
+ )
256
+ with gr.Row():
257
+ audio_input = gr.Audio(type="filepath", label="🎙 Аўдыёфайл")
258
+ video_input = gr.Video(label="🎥 Відэафайл")
259
+ btn = gr.Button("🚀 Апрацаваць")
260
+ with gr.Row():
261
+ transcript_output = gr.Textbox(
262
+ label="📄 SRT-транскрыпцыя", lines=12, autoscroll=True
263
+ )
264
+ file_output = gr.File(label="⬇️ SRT-файл")
265
+ status_output = gr.Textbox(label="🛠️ Статус", interactive=False, autoscroll=True)
266
+
267
+ def wrapped_process(audio, video, api_key, model_name, progress=gr.Progress()):
268
+ _configure_genai(api_key or api_key_default)
269
+
270
+ def update_status(text):
271
+ status_output.value = text
272
+ # Force scroll to bottom (JS hack)
273
+ status_output.scroll_to_end()
274
+
275
+ content, file_path = process_file(audio, video, model_name, progress, update_status)
276
+ return content, file_path
277
+
278
+ btn.click(
279
+ fn=wrapped_process,
280
+ inputs=[audio_input, video_input, api_key_box, model_dropdown],
281
+ outputs=[transcript_output, file_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  )
283
+
284
+ return demo
285
+
286
+
287
+ def main():
288
+ demo = build_ui()
289
+ demo.launch()
290
+
291
+
292
+ if __name__ == "__main__":
293
+ main()