datasciencesage commited on
Commit
f49ec35
·
1 Parent(s): d3b0d68

Added the Files

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. main.py +190 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["gunicorn", "-b","0.0.0.0:7860","main:app"]
main.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["KERAS_BACKEND"] = "jax"
3
+ os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
4
+ import logging
5
+ from pathlib import Path
6
+ import numpy as np
7
+ import librosa
8
+ import tensorflow_hub as hub
9
+ from flask import Flask, render_template, request, jsonify, session
10
+ from huggingface_hub import from_pretrained_keras
11
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
12
+ import keras
13
+ import torch
14
+ from werkzeug.utils import secure_filename
15
+ import traceback
16
+
17
+ # Configure logging
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
21
+ handlers=[
22
+ logging.FileHandler('app.log'),
23
+ logging.StreamHandler()
24
+ ]
25
+ )
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Environment setup
29
+
30
+
31
+ class AudioProcessor:
32
+ _instance = None
33
+ _initialized = False
34
+
35
+ def __new__(cls):
36
+ if cls._instance is None:
37
+ cls._instance = super(AudioProcessor, cls).__new__(cls)
38
+ return cls._instance
39
+
40
+ def __init__(self):
41
+ if not AudioProcessor._initialized:
42
+ self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
43
+ self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
44
+ self.initialize_models()
45
+ AudioProcessor._initialized = True
46
+
47
+ def initialize_models(self):
48
+ try:
49
+ logger.info("Initializing models...")
50
+ # Initialize transcription model
51
+ model_id = "distil-whisper/distil-large-v3"
52
+ self.transcription_model = AutoModelForSpeechSeq2Seq.from_pretrained(
53
+ model_id, torch_dtype=self.torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
54
+ )
55
+ self.transcription_model.to(self.device)
56
+ self.processor = AutoProcessor.from_pretrained(model_id)
57
+
58
+ # Initialize classification model
59
+ self.classification_model = keras.saving.load_model("hf://datasciencesage/attentionaudioclassification")
60
+
61
+ # Initialize pipeline
62
+ self.pipe = pipeline(
63
+ "automatic-speech-recognition",
64
+ model=self.transcription_model,
65
+ tokenizer=self.processor.tokenizer,
66
+ feature_extractor=self.processor.feature_extractor,
67
+ max_new_tokens=128,
68
+ chunk_length_s=25,
69
+ batch_size=16,
70
+ torch_dtype=self.torch_dtype,
71
+ device=self.device,
72
+ )
73
+
74
+ # Initialize YAMNet model
75
+ self.yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
76
+
77
+ logger.info("Models initialized successfully")
78
+ except Exception as e:
79
+ logger.error(f"Error initializing models: {str(e)}")
80
+ raise
81
+
82
+ def load_wav_16k_mono(self, filename):
83
+ try:
84
+ wav, sr = librosa.load(filename, mono=True, sr=None)
85
+ if sr != 16000:
86
+ wav = librosa.resample(wav, orig_sr=sr, target_sr=16000)
87
+ return wav
88
+ except Exception as e:
89
+ logger.error(f"Error loading audio file: {str(e)}")
90
+ raise
91
+
92
+ def get_features_yamnet_extract_embedding(self, wav_data):
93
+ try:
94
+ scores, embeddings, spectrogram = self.yamnet_model(wav_data)
95
+ return np.mean(embeddings.numpy(), axis=0)
96
+ except Exception as e:
97
+ logger.error(f"Error extracting YAMNet embeddings: {str(e)}")
98
+ raise
99
+
100
+ # Initialize Flask application
101
+ app = Flask(__name__)
102
+ app.secret_key = 'your_secret_key_here'
103
+ app.config['UPLOAD_FOLDER'] = Path('uploads')
104
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
105
+
106
+ # Create upload folder
107
+ app.config['UPLOAD_FOLDER'].mkdir(exist_ok=True)
108
+
109
+ # Initialize audio processor (will only happen once)
110
+ audio_processor = AudioProcessor()
111
+
112
+ @app.route('/')
113
+ def index():
114
+ session.clear()
115
+ return render_template('terminal.html')
116
+
117
+ @app.route('/process', methods=['POST'])
118
+ def process():
119
+ try:
120
+ data = request.json
121
+ command = data.get('command', '').strip().lower()
122
+
123
+ if command in ['classify', 'transcribe']:
124
+ session['operation'] = command
125
+ return jsonify({
126
+ 'result': f'root@math:~$ Upload a .mp3 file for {command} operation.',
127
+ 'upload': True
128
+ })
129
+ else:
130
+ return jsonify({
131
+ 'result': 'root@math:~$ Please specify an operation: "classify" or "transcribe".'
132
+ })
133
+ except Exception as e:
134
+ logger.error(f"Error in process route: {str(e)}\n{traceback.format_exc()}")
135
+ session.pop('operation', None)
136
+ return jsonify({'result': f'root@math:~$ Error: {str(e)}'})
137
+
138
+ @app.route('/upload', methods=['POST'])
139
+ def upload():
140
+ filepath = None
141
+ try:
142
+ operation = session.get('operation')
143
+ if not operation:
144
+ return jsonify({
145
+ 'result': 'root@math:~$ Please specify an operation first: "classify" or "transcribe".'
146
+ })
147
+
148
+ if 'file' not in request.files:
149
+ return jsonify({'result': 'root@math:~$ No file uploaded.'})
150
+
151
+ file = request.files['file']
152
+ if file.filename == '' or not file.filename.lower().endswith('.mp3'):
153
+ return jsonify({'result': 'root@math:~$ Please upload a valid .mp3 file.'})
154
+
155
+ filename = secure_filename(file.filename)
156
+ filepath = app.config['UPLOAD_FOLDER'] / filename
157
+
158
+ file.save(filepath)
159
+ wav_data = audio_processor.load_wav_16k_mono(filepath)
160
+
161
+ if operation == 'classify':
162
+ embeddings = audio_processor.get_features_yamnet_extract_embedding(wav_data)
163
+ embeddings = np.reshape(embeddings, (-1, 1024))
164
+ result = np.argmax(audio_processor.classification_model.predict(embeddings))
165
+ elif operation == 'transcribe':
166
+ result = audio_processor.pipe(str(filepath))['text']
167
+ else:
168
+ result = 'Invalid operation'
169
+
170
+ return jsonify({
171
+ 'result': f'root@math:~$ Result is: {result}\nroot@math:~$ Please specify an operation: "classify" or "transcribe".',
172
+ 'upload': False
173
+ })
174
+
175
+ except Exception as e:
176
+ logger.error(f"Error in upload route: {str(e)}\n{traceback.format_exc()}")
177
+ return jsonify({
178
+ 'result': f'root@math:~$ Error: {str(e)}\nroot@math:~$ Please specify an operation: "classify" or "transcribe".'
179
+ })
180
+ finally:
181
+ session.pop('operation', None)
182
+ if filepath and Path(filepath).exists():
183
+ try:
184
+ Path(filepath).unlink()
185
+ except Exception as e:
186
+ logger.error(f"Error deleting file {filepath}: {str(e)}")
187
+
188
+ if __name__ == '__main__':
189
+ # Set debug=False to prevent reloading
190
+ app.run(debug=False, host='0.0.0.0', port=5000)
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ librosa
3
+ transformers
4
+ numpy
5
+ traceback
6
+ keras==3.7.0
7
+ torch
8
+ torchvision
9
+ torchaudio
10
+ pathlib