demomodels commited on
Commit
1378b33
·
1 Parent(s): 6e8e7a7

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +45 -4
  2. requirements.txt +88 -0
app.py CHANGED
@@ -1,7 +1,48 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import json
3
+ import torch
4
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
5
 
6
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
8
 
9
+ model_id = "openai/whisper-large-v3"
10
+
11
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
12
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
13
+ )
14
+ model.to(device)
15
+
16
+ processor = AutoProcessor.from_pretrained(model_id)
17
+
18
+ pipe = pipeline(
19
+ "automatic-speech-recognition",
20
+ model=model,
21
+ tokenizer=processor.tokenizer,
22
+ feature_extractor=processor.feature_extractor,
23
+ max_new_tokens=128,
24
+ chunk_length_s=30,
25
+ batch_size=16,
26
+ return_timestamps=True,
27
+ torch_dtype=torch_dtype,
28
+ device=device,
29
+ )
30
+
31
+
32
+ def process_audio(audio_file):
33
+ # In this example, let's just return a hardcoded array of JSON objects
34
+ output_data = [
35
+ {"label": "cat", "confidence": 0.8},
36
+ {"label": "dog", "confidence": 0.7},
37
+ {"label": "bird", "confidence": 0.6}
38
+ ]
39
+ return json.dumps(output_data)
40
+ def process(audio):
41
+ result = pipe('audio.mp3')['chunks']
42
+ for item in result:
43
+ item['timestamp'] = list(item['timestamp'])
44
+ return result
45
+
46
+
47
+ iface = gr.Interface(fn=process_audio, inputs="audio", outputs="text")
48
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ aiofiles==23.2.1
3
+ altair==5.2.0
4
+ annotated-types==0.6.0
5
+ anyio==4.2.0
6
+ attrs==23.2.0
7
+ certifi==2024.2.2
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
+ colorama==0.4.6
11
+ contourpy==1.2.0
12
+ cycler==0.12.1
13
+ fastapi==0.109.2
14
+ ffmpy==0.3.2
15
+ filelock==3.13.1
16
+ fonttools==4.49.0
17
+ fsspec==2024.2.0
18
+ gradio==4.19.1
19
+ gradio_client==0.10.0
20
+ h11==0.14.0
21
+ httpcore==1.0.3
22
+ httpx==0.26.0
23
+ huggingface-hub==0.20.3
24
+ idna==3.6
25
+ importlib-resources==6.1.1
26
+ Jinja2==3.1.3
27
+ jsonschema==4.21.1
28
+ jsonschema-specifications==2023.12.1
29
+ kiwisolver==1.4.5
30
+ markdown-it-py==3.0.0
31
+ MarkupSafe==2.1.5
32
+ matplotlib==3.8.3
33
+ mdurl==0.1.2
34
+ mpmath==1.3.0
35
+ networkx==3.2.1
36
+ numpy==1.26.4
37
+ nvidia-cublas-cu12==12.1.3.1
38
+ nvidia-cuda-cupti-cu12==12.1.105
39
+ nvidia-cuda-nvrtc-cu12==12.1.105
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cudnn-cu12==8.9.2.26
42
+ nvidia-cufft-cu12==11.0.2.54
43
+ nvidia-curand-cu12==10.3.2.106
44
+ nvidia-cusolver-cu12==11.4.5.107
45
+ nvidia-cusparse-cu12==12.1.0.106
46
+ nvidia-nccl-cu12==2.19.3
47
+ nvidia-nvjitlink-cu12==12.3.101
48
+ nvidia-nvtx-cu12==12.1.105
49
+ orjson==3.9.14
50
+ packaging==23.2
51
+ pandas==2.2.0
52
+ pillow==10.2.0
53
+ psutil==5.9.8
54
+ pydantic==2.6.1
55
+ pydantic_core==2.16.2
56
+ pydub==0.25.1
57
+ Pygments==2.17.2
58
+ pyparsing==3.1.1
59
+ python-dateutil==2.8.2
60
+ python-multipart==0.0.9
61
+ pytz==2024.1
62
+ PyYAML==6.0.1
63
+ referencing==0.33.0
64
+ regex==2023.12.25
65
+ requests==2.31.0
66
+ rich==13.7.0
67
+ rpds-py==0.18.0
68
+ ruff==0.2.1
69
+ safetensors==0.4.2
70
+ semantic-version==2.10.0
71
+ shellingham==1.5.4
72
+ six==1.16.0
73
+ sniffio==1.3.0
74
+ starlette==0.36.3
75
+ sympy==1.12
76
+ tokenizers==0.15.2
77
+ tomlkit==0.12.0
78
+ toolz==0.12.1
79
+ torch==2.2.0
80
+ tqdm==4.66.2
81
+ transformers==4.37.2
82
+ triton==2.2.0
83
+ typer==0.9.0
84
+ typing_extensions==4.9.0
85
+ tzdata==2024.1
86
+ urllib3==2.2.0
87
+ uvicorn==0.27.1
88
+ websockets==11.0.3