Spaces:

ahmedkasem
/

quran-nlp

Running

App Files Files Community

deveix commited on Apr 22, 2024

Commit

a1b9bc0

1 Parent(s): e7838b2

add cnn

Browse files

Files changed (2) hide show

app/main.py +122 -1
requirements.txt +2 -1

app/main.py CHANGED Viewed

@@ -22,6 +22,9 @@ import opensmile
 import ffmpeg
 import noisereduce as nr
 default_sample_rate=22050
@@ -201,6 +204,124 @@ async def get_answer(item: Item, token: str = Depends(verify_token)):
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))
 # random forest
 model = joblib.load('app/1713661391.0946255_trained_model.joblib')
 pca = joblib.load('app/pca.pkl')
@@ -320,7 +441,7 @@ def repair_mp3_with_ffmpeg_python(input_path, output_path):
         print(f"Failed to repair file {input_path}: {str(e.stderr)}")
-@app.post("/mlp")
 async def handle_audio(file: UploadFile = File(...)):
     try:
         # Ensure that we are handling an MP3 file

 import ffmpeg
 import noisereduce as nr
+from tensorflow.keras.models import load_model
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.models import Sequential
 default_sample_rate=22050
         # If there's an error, return a 500 error with the error's details
         raise HTTPException(status_code=500, detail=str(e))
+# ------- CNN
+# Constants
+TARGET_DURATION = 3  # seconds for each audio clip
+SAMPLE_RATE = 44100  # sample rate to use
+N_MELS = 128  # number of Mel bands to generate
+HOP_LENGTH = 512  # number of samples between successive frames
+def preprocess_audio(file_path):
+    try:
+        # Load the audio file
+        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)
+        audio_length = len(audio)/SAMPLE_RATE
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        return None
+    except Exception as e:
+        print(f"Error loading audio file: {e}")
+        return None
+    # Check if audio signal is None
+    if audio is None:
+        print(f"Error: Audio signal is None for file '{file_path}'.")
+        return None
+    audio, _ = librosa.effects.trim(audio, top_db = 25)
+    audio = nr.reduce_noise(y = audio, sr=SAMPLE_RATE, thresh_n_mult_nonstationary=1,stationary=False)
+    # Determine how many 20-second clips can be made from the audio
+    if audio_length < TARGET_DURATION:
+        # If audio is shorter than 20 seconds, pad it
+        pad_length = int((TARGET_DURATION - audio_length) * sr)
+        padded_audio = np.pad(audio, (0, pad_length), mode='constant')
+        return [padded_audio]  # Return as a list for consistent output format
+    else:
+        # If audio is longer than or equal to 20 seconds, split it into 20-second clips
+        clip_length = TARGET_DURATION * sr
+        clips = []
+        for start in range(0, len(audio), clip_length):
+            end = start + clip_length
+            # Ensure the last clip has enough samples
+            if end > len(audio):
+                # Here you can choose to pad the last clip or simply not use it if it's too short
+                last_clip = np.pad(audio[start:], (0, end - len(audio)), mode='constant')
+                clips.append(last_clip)
+            else:
+                clips.append(audio[start:end])
+    return clips
+def generate_spectrogram(audio):
+    # Generate a Mel-scaled spectrogram
+    S = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS, hop_length=HOP_LENGTH)
+    S_dB = librosa.power_to_db(S, ref=np.max)
+    # Normalize the spectrogram to be between 0 and 1
+    S_dB_norm = librosa.util.normalize(S_dB)
+    return S_dB_norm
+cnn_model = load_model('app/cnn.h5')
+cnn_label_encoder = joblib.load('app/cnn_label_encoder.pkl')
+@app.post("/cnn")
+async def handle_cnn(file: UploadFile = File(...)):
+    try:
+        # Ensure that we are handling an MP3 file
+        if file.content_type == "audio/mpeg" or file.content_type == "audio/mp3":
+            file_extension = ".mp3"
+        elif file.content_type == "audio/wav":
+            file_extension = ".wav"
+        else:
+            raise HTTPException(status_code=400, detail="Invalid file type. Supported types: MP3, WAV.")
+        # Read the file's content
+        contents = await file.read()
+        temp_filename = f"app/{uuid4().hex}{file_extension}"
+        # Save file to a temporary file if needed or process directly from memory
+        with open(temp_filename, "wb") as f:
+            f.write(contents)
+        spectrograms = []
+        clips = preprocess_audio(temp_filename)
+        for clip in clips:
+            spectrogram = generate_spectrogram(clip)
+            if np.isnan(spectrogram).any() or np.isinf(spectrogram).any():
+                print("Invalid spectrogram detected")
+                continue
+            spectrograms.append(spectrogram)
+        X = np.array(spectrograms)
+        X = X[..., np.newaxis]
+        # Make predictions
+        predictions = cnn_model.predict(X)
+        # Convert predictions to label indexes
+        predicted_label_indexes = np.argmax(predictions, axis=1)
+        # Convert label indexes to actual label names
+        predicted_labels = cnn_label_encoder.inverse_transform(predicted_label_indexes)
+        print('decoded', predicted_labels)
+        # .tolist()
+        # Clean up the temporary file
+        os.remove(temp_filename)
+        # Return a successful response with decoded predictions
+        return {"message": "File processed successfully", "sheikh": predicted_labels}
+    except Exception as e:
+        print(e)
+        # Handle possible exceptions
+        raise HTTPException(status_code=500, detail=str(e))
 # random forest
 model = joblib.load('app/1713661391.0946255_trained_model.joblib')
 pca = joblib.load('app/pca.pkl')
         print(f"Failed to repair file {input_path}: {str(e.stderr)}")
+@app.post("/rf")
 async def handle_audio(file: UploadFile = File(...)):
     try:
         # Ensure that we are handling an MP3 file

requirements.txt CHANGED Viewed

@@ -19,4 +19,5 @@ matplotlib
 python-multipart
 ffmpeg-python
 noisereduce
-scikit-learn==1.2.2

 python-multipart
 ffmpeg-python
 noisereduce
+scikit-learn==1.2.2
+tensorflow