Spaces:

RathodHarish
/

HealthVoiceAnalyzer

Sleeping

App Files Files Community

RathodHarish commited on Jun 26

Commit

72f3531

verified ·

1 Parent(s): 3ade5a3

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -205

app.py CHANGED Viewed

@@ -1,218 +1,262 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Health Voice Analyzer</title>
-  <script src="https://cdn.jsdelivr.net/npm/react@18/umd/react.development.js"></script>
-  <script src="https://cdn.jsdelivr.net/npm/react-dom@18/umd/react-dom.development.js"></script>
-  <script src="https://cdn.jsdelivr.net/npm/[email protected]/babel.min.js"></script>
-  <script src="https://cdn.tailwindcss.com"></script>
-</head>
-<body>
-  <div id="root"></div>
-  <script type="text/babel">
-    const { useState, useEffect } = React;
-    const HealthVoiceAnalyzer = () => {
-      const [recording, setRecording] = useState(false);
-      const [audioBlob, setAudioBlob] = useState(null);
-      const [transcription, setTranscription] = useState('');
-      const [prediction, setPrediction] = useState('');
-      const [confidence, setConfidence] = useState(0);
-      const [error, setError] = useState('');
-      const [mediaRecorder, setMediaRecorder] = useState(null);
-      const [language, setLanguage] = useState('en');
-      const [query, setQuery] = useState('');
-      const [ttsResponse, setTtsResponse] = useState('');
-      // Initialize Web Speech API for text-to-speech
-      const speak = (text) => {
-        const utterance = new SpeechSynthesisUtterance(text);
-        utterance.lang = language;
-        window.speechSynthesis.speak(utterance);
-      };
-      // Start recording
-      const startRecording = async () => {
-        try {
-          const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-          const recorder = new MediaRecorder(stream);
-          const chunks = [];
-          recorder.ondataavailable = (e) => chunks.push(e.data);
-          recorder.onstop = () => {
-            const blob = new Blob(chunks, { type: 'audio/wav' });
-            setAudioBlob(blob);
-          };
-          recorder.start();
-          setMediaRecorder(recorder);
-          setRecording(true);
-          setError('');
-        } catch (err) {
-          setError('Error accessing microphone: ' + err.message);
-        }
-      };
-      // Stop recording
-      const stopRecording = () => {
-        if (mediaRecorder) {
-          mediaRecorder.stop();
-          setRecording(false);
-        }
-      };
-      // Analyze audio
-      const analyzeAudio = async () => {
-        if (!audioBlob) {
-          setError('No audio recorded.');
-          return;
-        }
-        setError('');
-        setTranscription('');
-        setPrediction('');
-        setConfidence(0);
-        // Simulate API call to backend (replace with actual Salesforce Apex endpoint)
-        const formData = new FormData();
-        formData.append('audio', audioBlob, 'recording.wav');
-        formData.append('language', language);
-        try {
-          // Placeholder for API call to Salesforce backend or Hugging Face endpoint
-          const response = await fetch('/api/analyze-voice', {
-            method: 'POST',
-            body: formData,
-            headers: { 'Accept-Language': language }
-          });
-          const data = await response.json();
-          if (data.error) {
-            setError(data.error);
-            speak(data.error);
-          } else {
-            setTranscription(data.transcription || 'No transcription available.');
-            setPrediction(data.prediction || 'No health condition predicted.');
-            setConfidence(data.confidence || 0);
-            const feedback = data.prediction === 'No health condition predicted'
-              ? 'No significant health indicators detected.'
-              : `Possible health condition: ${data.prediction} (confidence: ${data.confidence.toFixed(4)}). Consult a doctor.`;
-            const fullFeedback = `${feedback}\n\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.`;
-            speak(fullFeedback);
-          }
-        } catch (err) {
-          setError('Error analyzing audio: ' + err.message);
-          speak('Error analyzing audio.');
-        }
-      };
-      // Handle health query
-      const handleQuery = async () => {
-        if (!query) {
-          setError('Please enter a health query.');
-          return;
-        }
-        // Simulate Q&A response (replace with actual API call)
-        const response = `Sample response for query: "${query}". For accurate information, consult a healthcare provider.`;
-        setTtsResponse(response);
-        speak(response);
-      };
-      return (
-        <div className="min-h-screen bg-gray-100 flex flex-col items-center justify-center p-4">
-          <div className="bg-white shadow-lg rounded-lg p-8 max-w-2xl w-full">
-            <h1 className="text-3xl font-bold text-center text-blue-600 mb-6">
-              Health Voice Analyzer
-            </h1>
-            <p className="text-gray-600 text-center mb-4">
-              Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin.
-              For example, say "I have a fever" or ask a health question.
-            </p>
-            {/* Language Selection */}
-            <div className="mb-4">
-              <label className="block text-gray-700 font-semibold mb-2">Select Language</label>
-              <select
-                value={language}
-                onChange={(e) => setLanguage(e.target.value)}
-                className="w-full p-2 border rounded-md"
-              >
-                <option value="en">English</option>
-                <option value="es">Spanish</option>
-                <option value="hi">Hindi</option>
-                <option value="zh">Mandarin</option>
-              </select>
-            </div>
-            {/* Voice Recording */}
-            <div className="mb-6">
-              <button
-                onClick={recording ? stopRecording : startRecording}
-                className={`w-full py-3 rounded-md text-white font-semibold ${
-                  recording ? 'bg-red-500 hover:bg-red-600' : 'bg-blue-500 hover:bg-blue-600'
-                }`}
-              >
-                {recording ? 'Stop Recording' : 'Start Recording'}
-              </button>
-            </div>
-            {audioBlob && (
-              <button
-                onClick={analyzeAudio}
-                className="w-full py-3 bg-green-500 hover:bg-green-600 text-white font-semibold rounded-md mb-4"
-              >
-                Analyze Audio
-              </button>
-            )}
-            {/* Health Query Input */}
-            <div className="mb-6">
-              <label className="block text-gray-700 font-semibold mb-2">Ask a Health Question</label>
-              <input
-                type="text"
-                value={query}
-                onChange={(e) => setQuery(e.target.value)}
-                placeholder="E.g., What are symptoms of asthma?"
-                className="w-full p-2 border rounded-md mb-2"
-              />
-              <button
-                onClick={handleQuery}
-                className="w-full py-3 bg-purple-500 hover:bg-purple-600 text-white font-semibold rounded-md"
-              >
-                Submit Query
-              </button>
-            </div>
-            {/* Output */}
-            {error && <p className="text-red-500 mb-4">{error}</p>}
-            {transcription && (
-              <div className="mb-4">
-                <h2 className="text-xl font-semibold text-gray-700">Transcription</h2>
-                <p className="text-gray-600">{transcription}</p>
-              </div>
-            )}
-            {prediction && (
-              <div className="mb-4">
-                <h2 className="text-xl font-semibold text-gray-700">Health Assessment</h2>
-                <p className="text-gray-600">
-                  {prediction === 'No health condition predicted'
-                    ? 'No significant health indicators detected.'
-                    : `Possible health condition: ${prediction} (confidence: ${confidence.toFixed(4)}). Consult a doctor.`}
-                </p>
-                <p className="text-gray-500 text-sm mt-2">
-                  **Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
-                </p>
-              </div>
-            )}
-            {ttsResponse && (
-              <div className="mb-4">
-                <h2 className="text-xl font-semibold text-gray-700">Query Response</h2>
-                <p className="text-gray-600">{ttsResponse}</p>
-              </div>
-            )}
-          </div>
-        </div>
-      );
-    };
-    ReactDOM.render(<HealthVoiceAnalyzer />, document.getElementById('root'));
-  </script>
-</body>
-</html>

+import gradio as gr
+import librosa
+import numpy as np
+import os
+import hashlib
+from datetime import datetime
+import soundfile as sf
+import torch
+from tenacity import retry, stop_after_attempt, wait_fixed
+import pyttsx3
+from transformers import pipeline
+# Initialize text-to-speech engine
+tts_engine = pyttsx3.init()
+tts_engine.setProperty('rate', 150)
+# Initialize local models with retry logic
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+def load_whisper_model():
+    try:
+        model = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-tiny",  # Multilingual model
+            device=-1,  # CPU; use device=0 for GPU if available
+            model_kwargs={"use_safetensors": True}
+        )
+        print("Whisper model loaded successfully.")
+        return model
+    except Exception as e:
+        print(f"Failed to load Whisper model: {str(e)}")
+        raise
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
+def load_symptom_model():
+    try:
+        model = pipeline(
+            "text-classification",
+            model="abhirajeshbhai/symptom-2-disease-net",
+            device=-1,  # CPU
+            model_kwargs={"use_safetensors": True}
+        )
+        print("Symptom-2-Disease model loaded successfully.")
+        return model
+    except Exception as e:
+        print(f"Failed to load Symptom-2-Disease model: {str(e)}")
+        # Fallback to a generic model
+        try:
+            model = pipeline(
+                "text-classification",
+                model="distilbert-base-uncased",
+                device=-1
+            )
+            print("Fallback to distilbert-base-uncased model.")
+            return model
+        except Exception as fallback_e:
+            print(f"Fallback model failed: {str(fallback_e)}")
+            raise
+whisper = None
+symptom_classifier = None
+is_fallback_model = False
+try:
+    whisper = load_whisper_model()
+except Exception as e:
+    print(f"Whisper model initialization failed after retries: {str(e)}")
+try:
+    symptom_classifier = load_symptom_model()
+except Exception as e:
+    print(f"Symptom model initialization failed after retries: {str(e)}")
+    symptom_classifier = None
+    is_fallback_model = True
+def compute_file_hash(file_path):
+    """Compute MD5 hash of a file to check uniqueness."""
+    hash_md5 = hashlib.md5()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+def transcribe_audio(audio_file, language="en"):
+    """Transcribe audio using local Whisper model."""
+    if not whisper:
+        return "Error: Whisper model not loaded. Check logs for details or ensure sufficient compute resources."
+    try:
+        # Load and validate audio
+        audio, sr = librosa.load(audio_file, sr=16000)
+        if len(audio) < 1600:  # Less than 0.1s
+            return "Error: Audio too short. Please provide audio of at least 1 second."
+        if np.max(np.abs(audio)) < 1e-4:  # Too quiet
+            return "Error: Audio too quiet. Please provide clear audio describing symptoms."
+        # Save as WAV for Whisper
+        temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
+        sf.write(temp_wav, audio, sr)
+        # Transcribe with beam search and language
+        with torch.no_grad():
+            result = whisper(temp_wav, generate_kwargs={"num_beams": 5, "language": language})
+        transcription = result.get("text", "").strip()
+        print(f"Transcription: {transcription}")
+        # Clean up temp file
+        try:
+            os.remove(temp_wav)
+        except Exception:
+            pass
+        if not transcription:
+            return "Transcription empty. Please provide clear audio describing symptoms."
+        # Check for repetitive transcription
+        words = transcription.split()
+        if len(words) > 5 and len(set(words)) < len(words) / 2:
+            return "Error: Transcription appears repetitive. Please provide clear, non-repetitive audio describing symptoms."
+        return transcription
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
+def analyze_symptoms(text):
+    """Analyze symptoms using local Symptom-2-Disease model."""
+    if not symptom_classifier:
+        return "Error: Symptom-2-Disease model not loaded. Check logs for details or ensure sufficient compute resources.", 0.0
+    try:
+        if not text or "Error transcribing" in text:
+            return "No valid transcription for analysis.", 0.0
+        with torch.no_grad():
+            result = symptom_classifier(text)
+        if result and isinstance(result, list) and len(result) > 0:
+            prediction = result[0]["label"]
+            score = result[0]["score"]
+            if is_fallback_model:
+                print("Warning: Using fallback model (distilbert-base-uncased). Results may be less accurate.")
+                prediction = f"{prediction} (using fallback model)"
+            print(f"Health Prediction: {prediction}, Score: {score:.4f}")
+            return prediction, score
+        return "No health condition predicted", 0.0
+    except Exception as e:
+        return f"Error analyzing symptoms: {str(e)}", 0.0
+def handle_health_query(query, language="en"):
+    """Handle health-related queries with a simple response."""
+    if not query:
+        return "Please provide a valid health query."
+    # Placeholder for Q&A logic (could integrate a model like BERT for Q&A)
+    response = f"Response to query '{query}': For accurate health information, consult a healthcare provider."
+    # Text-to-speech
+    tts_engine.setProperty('voice', language)
+    tts_engine.say(response)
+    tts_engine.runAndWait()
+    return response
+def analyze_voice(audio_file, language="en"):
+    """Analyze voice for health indicators."""
+    try:
+        # Ensure unique file name
+        unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
+        os.rename(audio_file, unique_path)
+        audio_file = unique_path
+        # Log audio file info
+        file_hash = compute_file_hash(audio_file)
+        print(f"Processing audio file: {audio_file}, Hash: {file_hash}")
+        # Load audio to verify format
+        audio, sr = librosa.load(audio_file, sr=16000)
+        print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
+        # Transcribe audio
+        transcription = transcribe_audio(audio_file, language)
+        if "Error transcribing" in transcription:
+            tts_engine.say(transcription)
+            tts_engine.runAndWait()
+            return transcription
+        # Check for medication-related queries
+        if "medicine" in transcription.lower() or "treatment" in transcription.lower():
+            feedback = "Error: This tool does not provide medication or treatment advice. Please describe symptoms only (e.g., 'I have a fever')."
+            feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', File Hash = {file_hash}"
+            feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
+            tts_engine.say(feedback)
+            tts_engine.runAndWait()
+            return feedback
+        # Analyze symptoms
+        prediction, score = analyze_symptoms(transcription)
+        if "Error analyzing" in prediction:
+            tts_engine.say(prediction)
+            tts_engine.runAndWait()
+            return prediction
+        # Generate feedback
+        if prediction == "No health condition predicted":
+            feedback = "No significant health indicators detected."
+        else:
+            feedback = f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor."
+        feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', Prediction = {prediction}, Confidence = {score:.4f}, File Hash = {file_hash}"
+        feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
+        # Text-to-speech for feedback
+        tts_engine.say(feedback)
+        tts_engine.runAndWait()
+        # Clean up temporary audio file
+        try:
+            os.remove(audio_file)
+            print(f"Deleted temporary audio file: {audio_file}")
+        except Exception as e:
+            print(f"Failed to delete audio file: {str(e)}")
+        return feedback
+    except Exception as e:
+        error_msg = f"Error processing audio: {str(e)}"
+        tts_engine.say(error_msg)
+        tts_engine.runAndWait()
+        return error_msg
+# Gradio interface
+def create_gradio_interface():
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+            # Health Voice Analyzer
+            Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin (e.g., 'I have a fever').
+            Ask health questions in the text box below. Supports WAV, 16kHz audio.
+            **Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
+            """
+        )
+        with gr.Row():
+            language = gr.Dropdown(
+                choices=["en", "es", "hi", "zh"],
+                label="Select Language",
+                value="en"
+            )
+        with gr.Row():
+            audio_input = gr.Audio(type="filepath", label="Record or Upload Voice")
+        with gr.Row():
+            query_input = gr.Textbox(label="Ask a Health Question (e.g., 'What are symptoms of asthma?')")
+        with gr.Row():
+            output = gr.Textbox(label="Health Assessment Feedback")
+        with gr.Row():
+            analyze_button = gr.Button("Analyze Voice")
+            query_button = gr.Button("Submit Query")
+        analyze_button.click(
+            fn=analyze_voice,
+            inputs=[audio_input, language],
+            outputs=output
+        )
+        query_button.click(
+            fn=handle_health_query,
+            inputs=[query_input, language],
+            outputs=output
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860)