Spaces:

darag
/

kurdish-kurmanci-to-text-srt

Sleeping

App Files Files Community

darag commited on Aug 27, 2024

Commit

5876c56

verified ·

1 Parent(s): 1254faf

Create app.py

Browse files

kurdish kurdmanci to text and srt

Files changed (1) hide show

app.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# -*- coding: utf-8 -*-
+"""gradio_kurdi.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1DFSu80KU5dHKmbbqb2bPA5R8hzPbTP76
+"""
+!pip install torch transformers datasets librosa gradio
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import librosa
+import numpy as np
+from datetime import timedelta
+import gradio as gr
+import os
+def format_time(seconds):
+    td = timedelta(seconds=seconds)
+    hours, remainder = divmod(td.seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    milliseconds = td.microseconds // 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def estimate_word_timings(transcription, total_duration):
+    words = transcription.split()
+    total_chars = sum(len(word) for word in words)
+    char_duration = total_duration / total_chars
+    word_timings = []
+    current_time = 0
+    for word in words:
+        word_duration = len(word) * char_duration
+        start_time = current_time
+        end_time = current_time + word_duration
+        word_timings.append((word, start_time, end_time))
+        current_time = end_time
+    return word_timings
+model_name = "Akashpb13/xlsr_kurmanji_kurdish"
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+def transcribe_audio(file):
+    # معالجة الملف الصوتي
+    speech, rate = librosa.load(file, sr=16000)
+    # تحضير البيانات
+    input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values
+    # تنبؤات النموذج
+    with torch.no_grad():
+        logits = model(input_values).logits
+    # الحصول على النص من التنبؤات
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)[0]
+    # حساب الوقت الإجمالي للتسجيل
+    total_duration = len(speech) / rate
+    # تقدير توقيت كل كلمة
+    word_timings = estimate_word_timings(transcription, total_duration)
+ # إنشاء محتوى ملف SRT
+    srt_content = ""
+    for i, (word, start_time, end_time) in enumerate(word_timings, start=1):
+        start_time_str = format_time(start_time)
+        end_time_str = format_time(end_time)
+        srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n"
+    # حفظ الملف SRT
+    output_filename = "output_word_by_word.srt"
+    with open(output_filename, "w", encoding="utf-8") as f:
+        f.write(srt_content)
+    return transcription, output_filename
+interface = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")],
+    title="Deng --- Nivîsandin ::: Kurdî-Kurmancî",
+    description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .",
+     article="By Derax Elî"
+)
+interface.launch()