hackergeek98 commited on
Commit
d10f84e
·
verified ·
1 Parent(s): 92375a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import torch
 
 
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  from pydub import AudioSegment
4
  import os
@@ -11,8 +13,14 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
12
  processor = AutoProcessor.from_pretrained(model_id)
13
 
14
- # Create pipeline with correct parameter
15
- pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1)
 
 
 
 
 
 
16
 
17
  # Convert audio to WAV format
18
  def convert_to_wav(audio_path):
@@ -34,6 +42,13 @@ def split_audio(audio_path, chunk_length_ms=30000): # Default: 30 sec per chunk
34
 
35
  return chunk_paths
36
 
 
 
 
 
 
 
 
37
  # Transcribe a long audio file
38
  def transcribe_long_audio(audio_path):
39
  wav_path = convert_to_wav(audio_path)
@@ -41,8 +56,7 @@ def transcribe_long_audio(audio_path):
41
  transcription = ""
42
 
43
  for chunk in chunk_paths:
44
- result = pipe({"path": chunk}) # FIXED: Pass chunk as dict
45
- transcription += result["text"] + "\n"
46
  os.remove(chunk) # Remove processed chunk
47
 
48
  os.remove(wav_path) # Cleanup original file
 
1
  import torch
2
+ import torchaudio
3
+ import numpy as np
4
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
5
  from pydub import AudioSegment
6
  import os
 
13
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
14
  processor = AutoProcessor.from_pretrained(model_id)
15
 
16
+ # Create ASR pipeline
17
+ pipe = pipeline(
18
+ "automatic-speech-recognition",
19
+ model=model,
20
+ tokenizer=processor.tokenizer,
21
+ feature_extractor=processor.feature_extractor,
22
+ device=0 if torch.cuda.is_available() else -1,
23
+ )
24
 
25
  # Convert audio to WAV format
26
  def convert_to_wav(audio_path):
 
42
 
43
  return chunk_paths
44
 
45
+ # **🔹 Fixed: Read Audio Before Passing to Model**
46
+ def transcribe_audio_chunk(chunk_path):
47
+ waveform, sampling_rate = torchaudio.load(chunk_path) # Load audio
48
+ waveform = waveform.numpy() # Convert to numpy
49
+ result = pipe({"raw": waveform, "sampling_rate": sampling_rate}) # Pass raw data
50
+ return result["text"]
51
+
52
  # Transcribe a long audio file
53
  def transcribe_long_audio(audio_path):
54
  wav_path = convert_to_wav(audio_path)
 
56
  transcription = ""
57
 
58
  for chunk in chunk_paths:
59
+ transcription += transcribe_audio_chunk(chunk) + "\n"
 
60
  os.remove(chunk) # Remove processed chunk
61
 
62
  os.remove(wav_path) # Cleanup original file