Spaces:
Running
Running
File size: 4,759 Bytes
6160629 a9913d9 d9d75ab 06555da 6160629 c3bc5d7 e64464f a9913d9 265d4cc 6160629 d9d75ab 265d4cc a9913d9 265d4cc 6160629 3ab57cd a9913d9 a4cbcc8 a9913d9 265d4cc 6160629 a9913d9 265d4cc a9913d9 265d4cc 631160f a9913d9 631160f 265d4cc 3ab57cd a9913d9 265d4cc d9d75ab f2c750c d9d75ab f2c750c d9d75ab a9913d9 265d4cc a9913d9 631160f a9913d9 6160629 265d4cc 9dab23b 06555da 6c7418c 265d4cc 9dab23b a9913d9 265d4cc 9dab23b 265d4cc 9dab23b a9913d9 265d4cc a9913d9 9dab23b 265d4cc a9913d9 265d4cc a9913d9 265d4cc f2c750c a9913d9 f2c750c a9913d9 265d4cc f7a8b9d a9913d9 9dab23b 2376307 265d4cc 5e7fffb f7a8b9d a9913d9 265d4cc a9913d9 265d4cc 3ab57cd 631160f f2c750c a9913d9 265d4cc f2c750c a9913d9 f2c750c a9913d9 f2c750c 265d4cc f2c750c d9d75ab 7d946d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
import gradio as gr
from google import genai
import mimetypes
from pydub import AudioSegment
# Атрыманне ключоў і мадэляў з пераменных асяроддзя
GEMINI_API_KEY = os.getenv("gemini")
MODEL_NAME_TH = os.getenv("modTH")
MODEL_NAME = os.getenv("mod")
PROMPT_TRANSCRIBE = os.getenv("p")
# Стварэнне сесіі і мадэляў
session = genai.Session(api_key=GEMINI_API_KEY)
model_th = genai.GenerativeModel(model_name=MODEL_NAME_TH, session=session)
model_general = genai.GenerativeModel(model_name=MODEL_NAME, session=session)
def transcribe_audio(audio_file: str) -> str:
try:
mime_type, _ = mimetypes.guess_type(audio_file)
if not mime_type or not mime_type.startswith("audio"):
return "Файл не аўдыёфармату."
with open(audio_file, "rb") as f:
audio_data = f.read()
response = model_th.generate_content(
[PROMPT_TRANSCRIBE, {"mime_type": mime_type, "data": audio_data}]
)
return response.text.strip()
except Exception as e:
return f"Памылка: {e}"
def fix_subtitles_format(transcript: str) -> str:
prompt_fix = (
"Не змяняй тэксты, толькі выправі фармат часу ў субцітрах на 00:00:01,589:\n"
f"{transcript}"
)
try:
response_fix = model_general.generate_content(prompt_fix)
return response_fix.text.strip()
except Exception as e:
return transcript
def create_srt(transcript: str, filename: str = "subtitles.srt") -> tuple[str, str]:
try:
with open(filename, "w", encoding="utf-8") as f:
f.write(transcript)
return transcript, filename
except Exception as e:
return f"Памылка запісу SRT: {e}", ""
def process_audio(audio_path: str) -> tuple[str, str]:
transcript = transcribe_audio(audio_path)
if transcript.startswith("Памылка"):
return transcript, ""
fixed_transcript = fix_subtitles_format(transcript)
return create_srt(fixed_transcript)
def extract_audio_from_video(video_file: str) -> tuple[str, str]:
try:
audio = AudioSegment.from_file(video_file)
audio_path = "extracted_audio.mp3"
audio.export(audio_path, format="mp3")
return audio_path, ""
except Exception as e:
return "", f"Памылка аўдыё з відэа: {e}"
def process_video(video_path: str) -> tuple[str, str]:
audio_path, error = extract_audio_from_video(video_path)
if error:
return error, ""
return process_audio(audio_path)
def process_file(audio_path: str | None, video_path: str | None) -> tuple[str, str]:
if audio_path:
return process_audio(audio_path)
elif video_path:
return process_video(video_path)
return "Няма файла для апрацоўкі.", ""
def translate_transcript(transcript: str, target_language: str) -> tuple[str, str]:
prompt_text = (
f"Перакладзі тэксты субцітраў на {target_language} мову, астатняе не змяняй:\n{transcript}"
)
try:
response = model_general.generate_content(prompt_text)
translated = response.text.strip()
return create_srt(translated, "translated_subtitles.srt")
except Exception as e:
return f"Памылка перакладу: {e}", ""
with gr.Blocks() as demo:
gr.Markdown("# Транскрыпцыя аўдыя (беларуская мова)")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Аўдыёфайл")
video_input = gr.Video(label="Відэафайл")
btn = gr.Button("Апрацаваць")
transcript_output = gr.Textbox(label="Транскрыпцыя", lines=10)
file_output = gr.File(label="SRT-файл")
btn.click(
fn=process_file,
inputs=[audio_input, video_input],
outputs=[transcript_output, file_output],
)
gr.Markdown("## Пераклад субцітраў")
with gr.Row():
language_dropdown = gr.Dropdown(
["English", "Руcкая", "Польская", "Літоўская", "Нямецкая"],
label="Мова перакладу",
value="English",
)
translate_btn = gr.Button("Перакласці")
translation_output = gr.Textbox(label="Пераклад", lines=10)
translation_file_output = gr.File(label="Translated SRT-файл")
translate_btn.click(
fn=translate_transcript,
inputs=[transcript_output, language_dropdown],
outputs=[translation_output, translation_file_output],
)
demo.launch()
|