demomodels commited on
Commit
7d5800b
·
1 Parent(s): 3f6d243

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +4 -16
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  import json
3
  import torch
 
 
4
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
5
 
6
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -28,25 +30,11 @@ pipe = pipeline(
28
  device=device,
29
  )
30
 
31
-
32
- def process_audio(audio_file):
33
- # In this example, let's just return a hardcoded array of JSON objects
34
- output_data = [
35
- {"label": "cat", "confidence": 0.8},
36
- {"label": "dog", "confidence": 0.7},
37
- {"label": "bird", "confidence": 0.6}
38
- ]
39
- return json.dumps(output_data)
40
-
41
-
42
  def process(audio):
43
- # Read audio data from the file
44
- # with open(audio.name, 'rb') as f:
45
- # audio_data = f.read()
46
  audio_data, audio_filename = audio
 
47
 
48
-
49
- # Process the audio data
50
  result = pipe(audio_data)['chunks']
51
  for item in result:
52
  item['timestamp'] = list(item['timestamp'])
 
1
  import gradio as gr
2
  import json
3
  import torch
4
+ import numpy as np
5
+
6
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
 
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
30
  device=device,
31
  )
32
 
 
 
 
 
 
 
 
 
 
 
 
33
  def process(audio):
 
 
 
34
  audio_data, audio_filename = audio
35
+ audio_data = np.frombuffer(audio_data.read(), dtype=np.int16)
36
 
37
+ print(audio_data)
 
38
  result = pipe(audio_data)['chunks']
39
  for item in result:
40
  item['timestamp'] = list(item['timestamp'])