archit11 commited on
Commit
62dda31
·
verified ·
1 Parent(s): ee83532

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -5,12 +5,11 @@ import numpy as np
5
  from typing import Dict, List, Tuple
6
  import spaces
7
  import librosa
8
- # Constants
9
  MODEL_NAME = 'sarvamai/shuka_v1'
10
  SAMPLE_RATE = 16000
11
  MAX_NEW_TOKENS = 256
12
 
13
- # Load the pipeline
14
  def load_pipeline():
15
  return transformers.pipeline(
16
  model=MODEL_NAME,
@@ -37,11 +36,9 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
37
  if audio.dtype != np.float32:
38
  audio = audio.astype(np.float32)
39
 
40
- # Resample if necessary
41
  if sample_rate != SAMPLE_RATE:
42
  audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
43
 
44
- # Create input for the pipeline
45
  turns = create_conversation_turns("<|audio|>")
46
  inputs = {
47
  'audio': audio,
@@ -49,14 +46,12 @@ def transcribe_and_respond(audio_input: Tuple[int, np.ndarray]) -> str:
49
  'sampling_rate': SAMPLE_RATE
50
  }
51
 
52
- # Generate response
53
  response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS)
54
 
55
  return response
56
  except Exception as e:
57
  return f"Error processing audio: {str(e)}"
58
 
59
- # Create the Gradio interface
60
  iface = gr.Interface(
61
  fn=transcribe_and_respond,
62
  inputs=gr.Audio(sources="microphone", type="numpy"),
 
5
  from typing import Dict, List, Tuple
6
  import spaces
7
  import librosa
8
+
9
  MODEL_NAME = 'sarvamai/shuka_v1'
10
  SAMPLE_RATE = 16000
11
  MAX_NEW_TOKENS = 256
12
 
 
13
  def load_pipeline():
14
  return transformers.pipeline(
15
  model=MODEL_NAME,
 
36
  if audio.dtype != np.float32:
37
  audio = audio.astype(np.float32)
38
 
 
39
  if sample_rate != SAMPLE_RATE:
40
  audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
41
 
 
42
  turns = create_conversation_turns("<|audio|>")
43
  inputs = {
44
  'audio': audio,
 
46
  'sampling_rate': SAMPLE_RATE
47
  }
48
 
 
49
  response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS)
50
 
51
  return response
52
  except Exception as e:
53
  return f"Error processing audio: {str(e)}"
54
 
 
55
  iface = gr.Interface(
56
  fn=transcribe_and_respond,
57
  inputs=gr.Audio(sources="microphone", type="numpy"),