Spaces:

vitorcalvi
/

aig2

Sleeping

App Files Files Community

vitorcalvi commited on Nov 13, 2024

Commit

2f6587c

1 Parent(s): 4143e48

1

Browse files

Files changed (1) hide show

app.py +16 -30

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import tempfile
 import os
 import warnings
 from pydub import AudioSegment
-from datetime import datetime
 import time
 warnings.filterwarnings("ignore")
@@ -15,28 +14,19 @@ warnings.filterwarnings("ignore")
 app = FastAPI()
 def convert_mp3_to_wav(mp3_path):
-    # Convert MP3 to WAV
     sound = AudioSegment.from_mp3(mp3_path)
     wav_path = mp3_path.replace(".mp3", ".wav")
     sound.export(wav_path, format="wav")
     return wav_path
 def extract_audio_features(audio_file_path):
-    # Load the audio file using soundfile
     waveform, sample_rate = sf.read(audio_file_path)
-    # Ensure waveform is a 1D array (mono audio)
     if waveform.ndim > 1:
         waveform = waveform.mean(axis=1)
-    # Calculate basic features (pitch estimation requires a more complex algorithm, but we'll simplify)
     energy = np.mean(waveform ** 2)
-    mfccs = np.mean(np.abs(np.fft.fft(waveform)[:13]), axis=0)  # Simplified MFCC-like features
-    # Placeholder for speech rate and fundamental frequency
-    speech_rate = 4.0  # Arbitrary placeholder value for speech rate
-    f0 = np.mean(np.abs(np.diff(waveform))) * sample_rate / (2 * np.pi)  # Rough pitch estimate
     return f0, energy, speech_rate, mfccs, waveform, sample_rate
 def analyze_voice_stress(audio_file_path):
@@ -54,7 +44,7 @@ def analyze_voice_stress(audio_file_path):
     z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
     z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate
     stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
-    stress_level = round(float(1 / (1 + np.exp(-stress_score)) * 100), 2)  # Rounded to 2 decimal places
     categories = ["Very Low Stress", "Low Stress", "Moderate Stress", "High Stress", "Very High Stress"]
     category_idx = min(int(stress_level / 20), 4)
     stress_category = categories[category_idx]
@@ -72,9 +62,9 @@ def analyze_text_stress(text: str):
 class StressResponse(BaseModel):
     stress_level: float
     category: str
-    gender: str = None  # Optional, only for audio analysis
-    status_code: int
-    processing_time_ms: int
     size: str
 @app.post("/analyze-stress/", response_model=StressResponse)
@@ -88,7 +78,6 @@ async def analyze_stress(
     start_time = time.time()
-    # Handle audio file analysis
     if file or file_path:
         if file:
             if not (file.filename.endswith(".wav") or file.filename.endswith(".mp3")):
@@ -105,38 +94,35 @@ async def analyze_stress(
             temp_audio_path = file_path
             file_size = os.path.getsize(file_path)
-        # Convert MP3 to WAV if needed
         if temp_audio_path.endswith(".mp3"):
             temp_audio_path = convert_mp3_to_wav(temp_audio_path)
         try:
             result = analyze_voice_stress(temp_audio_path)
-            processing_time_ms = int((time.time() - start_time) * 1000)  # Calculate time in ms
             result.update({
-                "status_code": 200,
-                "processing_time_ms": processing_time_ms,
-                "size": f"{round(file_size / 1024, 2)} KB"  # Convert size to KB and round to 2 decimal places
             })
             return JSONResponse(content=result, status_code=200)
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))
         finally:
-            # Clean up temporary files
             if file:
                 os.remove(temp_audio_path)
-    # Handle text analysis
     elif text:
         result = analyze_text_stress(text)
-        processing_time_ms = int((time.time() - start_time) * 1000)  # Calculate time in ms
         result.update({
-            "status_code": 200,
-            "processing_time_ms": processing_time_ms,
-            "size": "N/A"  # Size is not applicable for text input
         })
         return JSONResponse(content=result, status_code=200)
 if __name__ == "__main__":
     import uvicorn
-    port = int(os.getenv("PORT", 7860))  # Use the PORT environment variable if needed
     uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)

 import os
 import warnings
 from pydub import AudioSegment
 import time
 warnings.filterwarnings("ignore")
 app = FastAPI()
 def convert_mp3_to_wav(mp3_path):
     sound = AudioSegment.from_mp3(mp3_path)
     wav_path = mp3_path.replace(".mp3", ".wav")
     sound.export(wav_path, format="wav")
     return wav_path
 def extract_audio_features(audio_file_path):
     waveform, sample_rate = sf.read(audio_file_path)
     if waveform.ndim > 1:
         waveform = waveform.mean(axis=1)
     energy = np.mean(waveform ** 2)
+    mfccs = np.mean(np.abs(np.fft.fft(waveform)[:13]), axis=0)
+    speech_rate = 4.0
+    f0 = np.mean(np.abs(np.diff(waveform))) * sample_rate / (2 * np.pi)
     return f0, energy, speech_rate, mfccs, waveform, sample_rate
 def analyze_voice_stress(audio_file_path):
     z_energy = (mean_energy - norm_mean_energy) / norm_std_energy
     z_speech_rate = (speech_rate - norm_speech_rate) / norm_std_speech_rate
     stress_score = (0.4 * z_f0) + (0.4 * z_speech_rate) + (0.2 * z_energy)
+    stress_level = round(float(1 / (1 + np.exp(-stress_score)) * 100), 2)
     categories = ["Very Low Stress", "Low Stress", "Moderate Stress", "High Stress", "Very High Stress"]
     category_idx = min(int(stress_level / 20), 4)
     stress_category = categories[category_idx]
 class StressResponse(BaseModel):
     stress_level: float
     category: str
+    gender: str = None
+    status: str
+    time: str
     size: str
 @app.post("/analyze-stress/", response_model=StressResponse)
     start_time = time.time()
     if file or file_path:
         if file:
             if not (file.filename.endswith(".wav") or file.filename.endswith(".mp3")):
             temp_audio_path = file_path
             file_size = os.path.getsize(file_path)
         if temp_audio_path.endswith(".mp3"):
             temp_audio_path = convert_mp3_to_wav(temp_audio_path)
         try:
             result = analyze_voice_stress(temp_audio_path)
+            processing_time_ms = int((time.time() - start_time) * 1000)
             result.update({
+                "status": "200 (OK)",
+                "time": f"{processing_time_ms} ms",
+                "size": f"{round(file_size / 1024, 2)} KB"
             })
             return JSONResponse(content=result, status_code=200)
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))
         finally:
             if file:
                 os.remove(temp_audio_path)
     elif text:
         result = analyze_text_stress(text)
+        processing_time_ms = int((time.time() - start_time) * 1000)
         result.update({
+            "status": "200 (OK)",
+            "time": f"{processing_time_ms} ms",
+            "size": "N/A"
         })
         return JSONResponse(content=result, status_code=200)
 if __name__ == "__main__":
     import uvicorn
+    port = int(os.getenv("PORT", 7860))
     uvicorn.run("app:app", host="0.0.0.0", port=port, reload=True)