darag commited on
Commit
5876c56
·
verified ·
1 Parent(s): 1254faf

Create app.py

Browse files

kurdish kurdmanci to text and srt

Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """gradio_kurdi.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1DFSu80KU5dHKmbbqb2bPA5R8hzPbTP76
8
+ """
9
+
10
+ !pip install torch transformers datasets librosa gradio
11
+
12
+ import torch
13
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
14
+ import librosa
15
+ import numpy as np
16
+ from datetime import timedelta
17
+ import gradio as gr
18
+ import os
19
+
20
+ def format_time(seconds):
21
+ td = timedelta(seconds=seconds)
22
+ hours, remainder = divmod(td.seconds, 3600)
23
+ minutes, seconds = divmod(remainder, 60)
24
+ milliseconds = td.microseconds // 1000
25
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
26
+
27
+ def estimate_word_timings(transcription, total_duration):
28
+ words = transcription.split()
29
+ total_chars = sum(len(word) for word in words)
30
+ char_duration = total_duration / total_chars
31
+
32
+ word_timings = []
33
+ current_time = 0
34
+
35
+ for word in words:
36
+ word_duration = len(word) * char_duration
37
+ start_time = current_time
38
+ end_time = current_time + word_duration
39
+ word_timings.append((word, start_time, end_time))
40
+ current_time = end_time
41
+
42
+ return word_timings
43
+
44
+ model_name = "Akashpb13/xlsr_kurmanji_kurdish"
45
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
46
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
47
+
48
+ def transcribe_audio(file):
49
+ # معالجة الملف الصوتي
50
+ speech, rate = librosa.load(file, sr=16000)
51
+
52
+ # تحضير البيانات
53
+ input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values
54
+
55
+ # تنبؤات النموذج
56
+ with torch.no_grad():
57
+ logits = model(input_values).logits
58
+
59
+ # الحصول على النص من التنبؤات
60
+ predicted_ids = torch.argmax(logits, dim=-1)
61
+ transcription = processor.batch_decode(predicted_ids)[0]
62
+
63
+ # حساب الوقت الإجمالي للتسجيل
64
+ total_duration = len(speech) / rate
65
+
66
+ # تقدير توقيت كل كلمة
67
+ word_timings = estimate_word_timings(transcription, total_duration)
68
+
69
+ # إنشاء محتوى ملف SRT
70
+ srt_content = ""
71
+ for i, (word, start_time, end_time) in enumerate(word_timings, start=1):
72
+ start_time_str = format_time(start_time)
73
+ end_time_str = format_time(end_time)
74
+ srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n"
75
+
76
+ # حفظ الملف SRT
77
+ output_filename = "output_word_by_word.srt"
78
+ with open(output_filename, "w", encoding="utf-8") as f:
79
+ f.write(srt_content)
80
+
81
+ return transcription, output_filename
82
+
83
+ interface = gr.Interface(
84
+ fn=transcribe_audio,
85
+ inputs=gr.Audio(type="filepath"),
86
+ outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")],
87
+ title="Deng --- Nivîsandin ::: Kurdî-Kurmancî",
88
+ description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .",
89
+ article="By Derax Elî"
90
+ )
91
+
92
+ interface.launch()