Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -3,39 +3,44 @@ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
  import librosa
5
 
6
- # Load the fine-tuned Whisper model and processor
7
  model_name = "hackergeek98/tinyyyy_whisper"
8
  processor = WhisperProcessor.from_pretrained(model_name)
9
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
10
 
11
- # Move model to GPU if available
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model.to(device)
14
 
15
- # Define the ASR function
 
 
16
  def transcribe_audio(audio_file):
17
- # Load audio file using librosa (supports multiple formats)
18
- audio_data, sampling_rate = librosa.load(audio_file, sr=16000) # Resample to 16kHz
19
 
20
- # Preprocess the audio
21
- inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
22
 
23
- # Generate transcription
24
  with torch.no_grad():
25
- predicted_ids = model.generate(inputs)
 
 
 
26
 
27
- # Decode the transcription
28
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
29
  return transcription
30
 
31
- # Create the Gradio interface
32
  interface = gr.Interface(
33
- fn=transcribe_audio, # Function to call
34
- inputs=gr.Audio(type="filepath"), # Input: Upload audio file (any format)
35
- outputs=gr.Textbox(label="Transcription"), # Output: Display transcription
36
- title="Whisper ASR: Tinyyyy Model",
37
- description="Upload an audio file (e.g., .wav, .mp3, .ogg), and the fine-tuned Whisper model will transcribe it.",
38
  )
39
 
40
- # Launch the app
41
  interface.launch()
 
3
  import torch
4
  import librosa
5
 
6
+ # بارگیری مدل و پردازنده
7
  model_name = "hackergeek98/tinyyyy_whisper"
8
  processor = WhisperProcessor.from_pretrained(model_name)
9
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
10
 
11
+ # انتقال مدل به GPU اگر موجود باشد
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model.to(device)
14
 
15
+ # تنظیم شناسه‌های اجباری برای زبان فارسی
16
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="fa", task="transcribe")
17
+
18
  def transcribe_audio(audio_file):
19
+ # بارگیری فایل صوتی و نمونه‌برداری مجدد
20
+ audio_data, sampling_rate = librosa.load(audio_file, sr=16000)
21
 
22
+ # پیش‌پردازش
23
+ inputs = processor(audio_data, sampling_rate=samning_rate, return_tensors="pt").input_features.to(device)
24
 
25
+ # تولید متن با اجبار به زبان فارسی
26
  with torch.no_grad():
27
+ predicted_ids = model.generate(
28
+ inputs,
29
+ forced_decoder_ids=forced_decoder_ids
30
+ )
31
 
32
+ # رمزگشایی خروجی
33
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
34
  return transcription
35
 
36
+ # ایجاد رابط Gradio
37
  interface = gr.Interface(
38
+ fn=transcribe_audio,
39
+ inputs=gr.Audio(type="filepath"),
40
+ outputs=gr.Textbox(label="متن فارسی"),
41
+ title="تبدیل گفتار به متن فارسی",
42
+ description="فایل صوتی فارسی آپلود کنید (فرمت‌های wav, mp3, ...)"
43
  )
44
 
45
+ # اجرای برنامه
46
  interface.launch()