hackergeek98 commited on
Commit
59eca54
·
verified ·
1 Parent(s): 51f2ece

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -1,6 +1,3 @@
1
- # Install required packages
2
- #!pip install torch torchaudio transformers pydub gradio
3
-
4
  import torch
5
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6
  from pydub import AudioSegment
@@ -14,9 +11,14 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
14
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
15
  processor = AutoProcessor.from_pretrained(model_id)
16
 
17
- # Create pipeline
18
  whisper_pipe = pipeline(
19
- "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1
 
 
 
 
 
20
  )
21
 
22
  # Convert audio to WAV format
@@ -46,7 +48,7 @@ def transcribe_long_audio(audio_path):
46
  transcription = ""
47
 
48
  for chunk in chunk_paths:
49
- result = whisper_pipe(chunk)
50
  transcription += result["text"] + "\n"
51
  os.remove(chunk) # Remove processed chunk
52
 
@@ -67,4 +69,4 @@ iface = gr.Interface(
67
  )
68
 
69
  if __name__ == "__main__":
70
- iface.launch()
 
 
 
 
1
  import torch
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  from pydub import AudioSegment
 
11
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
12
  processor = AutoProcessor.from_pretrained(model_id)
13
 
14
+ # Create pipeline with correct parameter
15
  whisper_pipe = pipeline(
16
+ "automatic-speech-recognition",
17
+ model=model,
18
+ tokenizer=processor.tokenizer,
19
+ feature_extractor=processor.feature_extractor,
20
+ device=0 if torch.cuda.is_available() else -1,
21
+ generate_kwargs={"input_features": None}, # Ensure correct input handling
22
  )
23
 
24
  # Convert audio to WAV format
 
48
  transcription = ""
49
 
50
  for chunk in chunk_paths:
51
+ result = whisper_pipe(chunk) # No longer uses deprecated `inputs`
52
  transcription += result["text"] + "\n"
53
  os.remove(chunk) # Remove processed chunk
54
 
 
69
  )
70
 
71
  if __name__ == "__main__":
72
+ iface.launch()