Spaces:

vitorcalvi
/

aig2

Sleeping

App Files Files Community

vitorcalvi commited on Nov 13, 2024

Commit

5630c13

1 Parent(s): 339e2ea

1

Browse files

Files changed (3) hide show

Dockerfile +6 -9
app.py +40 -32
requirements.txt +1 -9

Dockerfile CHANGED Viewed

@@ -1,21 +1,18 @@
 FROM python:3.9
-# Install system dependencies
-RUN apt-get update && apt-get install -y ffmpeg libsndfile1
-# Set the working directory
 WORKDIR /app
-# Copy requirements and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the application code
 COPY . .
-# Expose the required port
 EXPOSE 7860
-# Run the app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use the official Python image.
 FROM python:3.9
+# Set the working directory.
 WORKDIR /app
+# Copy requirements file and install dependencies.
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code.
 COPY . .
+# Expose port 7860 for the FastAPI app.
 EXPOSE 7860
+# Run the app using Uvicorn.
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,37 +1,49 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-import librosa
 import numpy as np
 import tempfile
 import os
 import warnings
-import soundfile as sf
-from pydub import AudioSegment
-warnings.filterwarnings("ignore", category=UserWarning, module='librosa')
 app = FastAPI()
-def convert_mp3_to_wav(mp3_path, wav_path):
-    # Convert mp3 to wav using pydub and ffmpeg
-    audio = AudioSegment.from_mp3(mp3_path)
-    audio.export(wav_path, format="wav")
 def extract_audio_features(audio_file_path):
-    # Load the audio file and extract features
-    y, sr = sf.read(audio_file_path)
-    f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=75, fmax=600)
-    f0 = f0[~np.isnan(f0)]
-    energy = librosa.feature.rms(y=y)[0]
-    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
-    tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
-    speech_rate = tempo / 60
-    return f0, energy, speech_rate, mfccs, y, sr
 def analyze_voice_stress(audio_file_path):
-    f0, energy, speech_rate, mfccs, y, sr = extract_audio_features(audio_file_path)
     mean_f0 = np.mean(f0)
     std_f0 = np.std(f0)
     mean_energy = np.mean(energy)
@@ -79,20 +91,17 @@ async def analyze_stress(
     # Handle audio file analysis
     if file or file_path:
         if file:
-            if not file.filename.endswith(".mp3"):
-                raise HTTPException(status_code=400, detail="Only .mp3 files are supported.")
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                 temp_file.write(await file.read())
-                temp_mp3_path = temp_file.name
-                temp_wav_path = temp_mp3_path.replace(".mp3", ".wav")
-                convert_mp3_to_wav(temp_mp3_path, temp_wav_path)
         else:
-            if not file_path.endswith(".mp3"):
-                raise HTTPException(status_code=400, detail="Only .mp3 files are supported.")
             if not os.path.exists(file_path):
                 raise HTTPException(status_code=400, detail="File path does not exist.")
-            temp_wav_path = file_path.replace(".mp3", ".wav")
-            convert_mp3_to_wav(file_path, temp_wav_path)
         try:
             result = analyze_voice_stress(temp_wav_path)
@@ -102,7 +111,6 @@ async def analyze_stress(
         finally:
             # Clean up temporary files
             if file:
-                os.remove(temp_mp3_path)
                 os.remove(temp_wav_path)
     # Handle text analysis
@@ -112,5 +120,5 @@ async def analyze_stress(
 if __name__ == "__main__":
     import uvicorn
-    port = int(os.getenv("PORT", 7860))  # Use the PORT environment variable for Render compatibility
     uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)

 from fastapi import FastAPI, File, UploadFile, HTTPException, Form
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+import torchaudio
 import numpy as np
 import tempfile
 import os
 import warnings
+warnings.filterwarnings("ignore")
 app = FastAPI()
 def extract_audio_features(audio_file_path):
+    # Load the audio file using torchaudio
+    waveform, sample_rate = torchaudio.load(audio_file_path)
+    # Ensure waveform is mono by averaging channels if necessary
+    if waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    waveform = waveform.squeeze()  # Remove channel dimension if it's 1
+    # Extract pitch (fundamental frequency)
+    pitch_frequencies, voiced_flags, _ = torchaudio.functional.detect_pitch_frequency(
+        waveform, sample_rate, frame_time=0.01, win_length=1024
+    )
+    f0 = pitch_frequencies[voiced_flags > 0]
+    # Extract energy
+    energy = waveform.pow(2).numpy()
+    # Extract MFCCs
+    mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sample_rate, n_mfcc=13)
+    mfccs = mfcc_transform(waveform.unsqueeze(0)).squeeze(0).numpy()
+    # Estimate speech rate (simplified)
+    tempo = torchaudio.functional.estimate_tempo(waveform, sample_rate)
+    speech_rate = tempo / 60 if tempo is not None else 0
+    return f0.numpy(), energy, speech_rate, mfccs, waveform.numpy(), sample_rate
 def analyze_voice_stress(audio_file_path):
+    f0, energy, speech_rate, mfccs, waveform, sample_rate = extract_audio_features(audio_file_path)
+    if len(f0) == 0:
+        raise ValueError("Could not extract fundamental frequency from the audio.")
     mean_f0 = np.mean(f0)
     std_f0 = np.std(f0)
     mean_energy = np.mean(energy)
     # Handle audio file analysis
     if file or file_path:
         if file:
+            if not file.filename.endswith(".wav"):
+                raise HTTPException(status_code=400, detail="Only .wav files are supported.")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
                 temp_file.write(await file.read())
+                temp_wav_path = temp_file.name
         else:
+            if not file_path.endswith(".wav"):
+                raise HTTPException(status_code=400, detail="Only .wav files are supported.")
             if not os.path.exists(file_path):
                 raise HTTPException(status_code=400, detail="File path does not exist.")
+            temp_wav_path = file_path
         try:
             result = analyze_voice_stress(temp_wav_path)
         finally:
             # Clean up temporary files
             if file:
                 os.remove(temp_wav_path)
     # Handle text analysis
 if __name__ == "__main__":
     import uvicorn
+    port = int(os.getenv("PORT", 7860))  # Use the PORT environment variable if needed
     uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)

requirements.txt CHANGED Viewed

@@ -1,13 +1,5 @@
 fastapi
 uvicorn
-librosa
 numpy
 pydantic
-soundfile
-pydub
-ffmpeg-python
-python-multipart

 fastapi
 uvicorn
+torchaudio
 numpy
 pydantic