RathodHarish commited on
Commit
72f3531
·
verified ·
1 Parent(s): 3ade5a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -205
app.py CHANGED
@@ -1,218 +1,262 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Health Voice Analyzer</title>
7
- <script src="https://cdn.jsdelivr.net/npm/react@18/umd/react.development.js"></script>
8
- <script src="https://cdn.jsdelivr.net/npm/react-dom@18/umd/react-dom.development.js"></script>
9
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/babel.min.js"></script>
10
- <script src="https://cdn.tailwindcss.com"></script>
11
- </head>
12
- <body>
13
- <div id="root"></div>
14
- <script type="text/babel">
15
- const { useState, useEffect } = React;
16
 
17
- const HealthVoiceAnalyzer = () => {
18
- const [recording, setRecording] = useState(false);
19
- const [audioBlob, setAudioBlob] = useState(null);
20
- const [transcription, setTranscription] = useState('');
21
- const [prediction, setPrediction] = useState('');
22
- const [confidence, setConfidence] = useState(0);
23
- const [error, setError] = useState('');
24
- const [mediaRecorder, setMediaRecorder] = useState(null);
25
- const [language, setLanguage] = useState('en');
26
- const [query, setQuery] = useState('');
27
- const [ttsResponse, setTtsResponse] = useState('');
28
 
29
- // Initialize Web Speech API for text-to-speech
30
- const speak = (text) => {
31
- const utterance = new SpeechSynthesisUtterance(text);
32
- utterance.lang = language;
33
- window.speechSynthesis.speak(utterance);
34
- };
 
 
 
 
 
 
 
 
 
35
 
36
- // Start recording
37
- const startRecording = async () => {
38
- try {
39
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
40
- const recorder = new MediaRecorder(stream);
41
- const chunks = [];
42
- recorder.ondataavailable = (e) => chunks.push(e.data);
43
- recorder.onstop = () => {
44
- const blob = new Blob(chunks, { type: 'audio/wav' });
45
- setAudioBlob(blob);
46
- };
47
- recorder.start();
48
- setMediaRecorder(recorder);
49
- setRecording(true);
50
- setError('');
51
- } catch (err) {
52
- setError('Error accessing microphone: ' + err.message);
53
- }
54
- };
 
 
 
 
 
 
55
 
56
- // Stop recording
57
- const stopRecording = () => {
58
- if (mediaRecorder) {
59
- mediaRecorder.stop();
60
- setRecording(false);
61
- }
62
- };
63
 
64
- // Analyze audio
65
- const analyzeAudio = async () => {
66
- if (!audioBlob) {
67
- setError('No audio recorded.');
68
- return;
69
- }
70
- setError('');
71
- setTranscription('');
72
- setPrediction('');
73
- setConfidence(0);
74
 
75
- // Simulate API call to backend (replace with actual Salesforce Apex endpoint)
76
- const formData = new FormData();
77
- formData.append('audio', audioBlob, 'recording.wav');
78
- formData.append('language', language);
 
 
79
 
80
- try {
81
- // Placeholder for API call to Salesforce backend or Hugging Face endpoint
82
- const response = await fetch('/api/analyze-voice', {
83
- method: 'POST',
84
- body: formData,
85
- headers: { 'Accept-Language': language }
86
- });
87
- const data = await response.json();
88
- if (data.error) {
89
- setError(data.error);
90
- speak(data.error);
91
- } else {
92
- setTranscription(data.transcription || 'No transcription available.');
93
- setPrediction(data.prediction || 'No health condition predicted.');
94
- setConfidence(data.confidence || 0);
95
- const feedback = data.prediction === 'No health condition predicted'
96
- ? 'No significant health indicators detected.'
97
- : `Possible health condition: ${data.prediction} (confidence: ${data.confidence.toFixed(4)}). Consult a doctor.`;
98
- const fullFeedback = `${feedback}\n\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.`;
99
- speak(fullFeedback);
100
- }
101
- } catch (err) {
102
- setError('Error analyzing audio: ' + err.message);
103
- speak('Error analyzing audio.');
104
- }
105
- };
106
 
107
- // Handle health query
108
- const handleQuery = async () => {
109
- if (!query) {
110
- setError('Please enter a health query.');
111
- return;
112
- }
113
- // Simulate Q&A response (replace with actual API call)
114
- const response = `Sample response for query: "${query}". For accurate information, consult a healthcare provider.`;
115
- setTtsResponse(response);
116
- speak(response);
117
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- return (
120
- <div className="min-h-screen bg-gray-100 flex flex-col items-center justify-center p-4">
121
- <div className="bg-white shadow-lg rounded-lg p-8 max-w-2xl w-full">
122
- <h1 className="text-3xl font-bold text-center text-blue-600 mb-6">
123
- Health Voice Analyzer
124
- </h1>
125
- <p className="text-gray-600 text-center mb-4">
126
- Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin.
127
- For example, say "I have a fever" or ask a health question.
128
- </p>
 
 
 
 
 
 
 
 
 
 
129
 
130
- {/* Language Selection */}
131
- <div className="mb-4">
132
- <label className="block text-gray-700 font-semibold mb-2">Select Language</label>
133
- <select
134
- value={language}
135
- onChange={(e) => setLanguage(e.target.value)}
136
- className="w-full p-2 border rounded-md"
137
- >
138
- <option value="en">English</option>
139
- <option value="es">Spanish</option>
140
- <option value="hi">Hindi</option>
141
- <option value="zh">Mandarin</option>
142
- </select>
143
- </div>
144
 
145
- {/* Voice Recording */}
146
- <div className="mb-6">
147
- <button
148
- onClick={recording ? stopRecording : startRecording}
149
- className={`w-full py-3 rounded-md text-white font-semibold ${
150
- recording ? 'bg-red-500 hover:bg-red-600' : 'bg-blue-500 hover:bg-blue-600'
151
- }`}
152
- >
153
- {recording ? 'Stop Recording' : 'Start Recording'}
154
- </button>
155
- </div>
156
- {audioBlob && (
157
- <button
158
- onClick={analyzeAudio}
159
- className="w-full py-3 bg-green-500 hover:bg-green-600 text-white font-semibold rounded-md mb-4"
160
- >
161
- Analyze Audio
162
- </button>
163
- )}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- {/* Health Query Input */}
166
- <div className="mb-6">
167
- <label className="block text-gray-700 font-semibold mb-2">Ask a Health Question</label>
168
- <input
169
- type="text"
170
- value={query}
171
- onChange={(e) => setQuery(e.target.value)}
172
- placeholder="E.g., What are symptoms of asthma?"
173
- className="w-full p-2 border rounded-md mb-2"
174
- />
175
- <button
176
- onClick={handleQuery}
177
- className="w-full py-3 bg-purple-500 hover:bg-purple-600 text-white font-semibold rounded-md"
178
- >
179
- Submit Query
180
- </button>
181
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- {/* Output */}
184
- {error && <p className="text-red-500 mb-4">{error}</p>}
185
- {transcription && (
186
- <div className="mb-4">
187
- <h2 className="text-xl font-semibold text-gray-700">Transcription</h2>
188
- <p className="text-gray-600">{transcription}</p>
189
- </div>
190
- )}
191
- {prediction && (
192
- <div className="mb-4">
193
- <h2 className="text-xl font-semibold text-gray-700">Health Assessment</h2>
194
- <p className="text-gray-600">
195
- {prediction === 'No health condition predicted'
196
- ? 'No significant health indicators detected.'
197
- : `Possible health condition: ${prediction} (confidence: ${confidence.toFixed(4)}). Consult a doctor.`}
198
- </p>
199
- <p className="text-gray-500 text-sm mt-2">
200
- **Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
201
- </p>
202
- </div>
203
- )}
204
- {ttsResponse && (
205
- <div className="mb-4">
206
- <h2 className="text-xl font-semibold text-gray-700">Query Response</h2>
207
- <p className="text-gray-600">{ttsResponse}</p>
208
- </div>
209
- )}
210
- </div>
211
- </div>
212
- );
213
- };
214
-
215
- ReactDOM.render(<HealthVoiceAnalyzer />, document.getElementById('root'));
216
- </script>
217
- </body>
218
- </html>
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import os
5
+ import hashlib
6
+ from datetime import datetime
7
+ import soundfile as sf
8
+ import torch
9
+ from tenacity import retry, stop_after_attempt, wait_fixed
10
+ import pyttsx3
11
+ from transformers import pipeline
 
 
 
 
12
 
13
+ # Initialize text-to-speech engine
14
+ tts_engine = pyttsx3.init()
15
+ tts_engine.setProperty('rate', 150)
 
 
 
 
 
 
 
 
16
 
17
+ # Initialize local models with retry logic
18
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
19
+ def load_whisper_model():
20
+ try:
21
+ model = pipeline(
22
+ "automatic-speech-recognition",
23
+ model="openai/whisper-tiny", # Multilingual model
24
+ device=-1, # CPU; use device=0 for GPU if available
25
+ model_kwargs={"use_safetensors": True}
26
+ )
27
+ print("Whisper model loaded successfully.")
28
+ return model
29
+ except Exception as e:
30
+ print(f"Failed to load Whisper model: {str(e)}")
31
+ raise
32
 
33
+ @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
34
+ def load_symptom_model():
35
+ try:
36
+ model = pipeline(
37
+ "text-classification",
38
+ model="abhirajeshbhai/symptom-2-disease-net",
39
+ device=-1, # CPU
40
+ model_kwargs={"use_safetensors": True}
41
+ )
42
+ print("Symptom-2-Disease model loaded successfully.")
43
+ return model
44
+ except Exception as e:
45
+ print(f"Failed to load Symptom-2-Disease model: {str(e)}")
46
+ # Fallback to a generic model
47
+ try:
48
+ model = pipeline(
49
+ "text-classification",
50
+ model="distilbert-base-uncased",
51
+ device=-1
52
+ )
53
+ print("Fallback to distilbert-base-uncased model.")
54
+ return model
55
+ except Exception as fallback_e:
56
+ print(f"Fallback model failed: {str(fallback_e)}")
57
+ raise
58
 
59
+ whisper = None
60
+ symptom_classifier = None
61
+ is_fallback_model = False
 
 
 
 
62
 
63
+ try:
64
+ whisper = load_whisper_model()
65
+ except Exception as e:
66
+ print(f"Whisper model initialization failed after retries: {str(e)}")
 
 
 
 
 
 
67
 
68
+ try:
69
+ symptom_classifier = load_symptom_model()
70
+ except Exception as e:
71
+ print(f"Symptom model initialization failed after retries: {str(e)}")
72
+ symptom_classifier = None
73
+ is_fallback_model = True
74
 
75
+ def compute_file_hash(file_path):
76
+ """Compute MD5 hash of a file to check uniqueness."""
77
+ hash_md5 = hashlib.md5()
78
+ with open(file_path, "rb") as f:
79
+ for chunk in iter(lambda: f.read(4096), b""):
80
+ hash_md5.update(chunk)
81
+ return hash_md5.hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ def transcribe_audio(audio_file, language="en"):
84
+ """Transcribe audio using local Whisper model."""
85
+ if not whisper:
86
+ return "Error: Whisper model not loaded. Check logs for details or ensure sufficient compute resources."
87
+ try:
88
+ # Load and validate audio
89
+ audio, sr = librosa.load(audio_file, sr=16000)
90
+ if len(audio) < 1600: # Less than 0.1s
91
+ return "Error: Audio too short. Please provide audio of at least 1 second."
92
+ if np.max(np.abs(audio)) < 1e-4: # Too quiet
93
+ return "Error: Audio too quiet. Please provide clear audio describing symptoms."
94
+
95
+ # Save as WAV for Whisper
96
+ temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
97
+ sf.write(temp_wav, audio, sr)
98
+
99
+ # Transcribe with beam search and language
100
+ with torch.no_grad():
101
+ result = whisper(temp_wav, generate_kwargs={"num_beams": 5, "language": language})
102
+ transcription = result.get("text", "").strip()
103
+ print(f"Transcription: {transcription}")
104
+
105
+ # Clean up temp file
106
+ try:
107
+ os.remove(temp_wav)
108
+ except Exception:
109
+ pass
110
+
111
+ if not transcription:
112
+ return "Transcription empty. Please provide clear audio describing symptoms."
113
+ # Check for repetitive transcription
114
+ words = transcription.split()
115
+ if len(words) > 5 and len(set(words)) < len(words) / 2:
116
+ return "Error: Transcription appears repetitive. Please provide clear, non-repetitive audio describing symptoms."
117
+ return transcription
118
+ except Exception as e:
119
+ return f"Error transcribing audio: {str(e)}"
120
 
121
+ def analyze_symptoms(text):
122
+ """Analyze symptoms using local Symptom-2-Disease model."""
123
+ if not symptom_classifier:
124
+ return "Error: Symptom-2-Disease model not loaded. Check logs for details or ensure sufficient compute resources.", 0.0
125
+ try:
126
+ if not text or "Error transcribing" in text:
127
+ return "No valid transcription for analysis.", 0.0
128
+ with torch.no_grad():
129
+ result = symptom_classifier(text)
130
+ if result and isinstance(result, list) and len(result) > 0:
131
+ prediction = result[0]["label"]
132
+ score = result[0]["score"]
133
+ if is_fallback_model:
134
+ print("Warning: Using fallback model (distilbert-base-uncased). Results may be less accurate.")
135
+ prediction = f"{prediction} (using fallback model)"
136
+ print(f"Health Prediction: {prediction}, Score: {score:.4f}")
137
+ return prediction, score
138
+ return "No health condition predicted", 0.0
139
+ except Exception as e:
140
+ return f"Error analyzing symptoms: {str(e)}", 0.0
141
 
142
+ def handle_health_query(query, language="en"):
143
+ """Handle health-related queries with a simple response."""
144
+ if not query:
145
+ return "Please provide a valid health query."
146
+ # Placeholder for Q&A logic (could integrate a model like BERT for Q&A)
147
+ response = f"Response to query '{query}': For accurate health information, consult a healthcare provider."
148
+ # Text-to-speech
149
+ tts_engine.setProperty('voice', language)
150
+ tts_engine.say(response)
151
+ tts_engine.runAndWait()
152
+ return response
 
 
 
153
 
154
+ def analyze_voice(audio_file, language="en"):
155
+ """Analyze voice for health indicators."""
156
+ try:
157
+ # Ensure unique file name
158
+ unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
159
+ os.rename(audio_file, unique_path)
160
+ audio_file = unique_path
161
+
162
+ # Log audio file info
163
+ file_hash = compute_file_hash(audio_file)
164
+ print(f"Processing audio file: {audio_file}, Hash: {file_hash}")
165
+
166
+ # Load audio to verify format
167
+ audio, sr = librosa.load(audio_file, sr=16000)
168
+ print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
169
+
170
+ # Transcribe audio
171
+ transcription = transcribe_audio(audio_file, language)
172
+ if "Error transcribing" in transcription:
173
+ tts_engine.say(transcription)
174
+ tts_engine.runAndWait()
175
+ return transcription
176
+
177
+ # Check for medication-related queries
178
+ if "medicine" in transcription.lower() or "treatment" in transcription.lower():
179
+ feedback = "Error: This tool does not provide medication or treatment advice. Please describe symptoms only (e.g., 'I have a fever')."
180
+ feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', File Hash = {file_hash}"
181
+ feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
182
+ tts_engine.say(feedback)
183
+ tts_engine.runAndWait()
184
+ return feedback
185
+
186
+ # Analyze symptoms
187
+ prediction, score = analyze_symptoms(transcription)
188
+ if "Error analyzing" in prediction:
189
+ tts_engine.say(prediction)
190
+ tts_engine.runAndWait()
191
+ return prediction
192
+
193
+ # Generate feedback
194
+ if prediction == "No health condition predicted":
195
+ feedback = "No significant health indicators detected."
196
+ else:
197
+ feedback = f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor."
198
+
199
+ feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', Prediction = {prediction}, Confidence = {score:.4f}, File Hash = {file_hash}"
200
+ feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
201
+
202
+ # Text-to-speech for feedback
203
+ tts_engine.say(feedback)
204
+ tts_engine.runAndWait()
205
+
206
+ # Clean up temporary audio file
207
+ try:
208
+ os.remove(audio_file)
209
+ print(f"Deleted temporary audio file: {audio_file}")
210
+ except Exception as e:
211
+ print(f"Failed to delete audio file: {str(e)}")
212
+
213
+ return feedback
214
+ except Exception as e:
215
+ error_msg = f"Error processing audio: {str(e)}"
216
+ tts_engine.say(error_msg)
217
+ tts_engine.runAndWait()
218
+ return error_msg
219
 
220
+ # Gradio interface
221
+ def create_gradio_interface():
222
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
223
+ gr.Markdown(
224
+ """
225
+ # Health Voice Analyzer
226
+ Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin (e.g., 'I have a fever').
227
+ Ask health questions in the text box below. Supports WAV, 16kHz audio.
228
+ **Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
229
+ """
230
+ )
231
+ with gr.Row():
232
+ language = gr.Dropdown(
233
+ choices=["en", "es", "hi", "zh"],
234
+ label="Select Language",
235
+ value="en"
236
+ )
237
+ with gr.Row():
238
+ audio_input = gr.Audio(type="filepath", label="Record or Upload Voice")
239
+ with gr.Row():
240
+ query_input = gr.Textbox(label="Ask a Health Question (e.g., 'What are symptoms of asthma?')")
241
+ with gr.Row():
242
+ output = gr.Textbox(label="Health Assessment Feedback")
243
+ with gr.Row():
244
+ analyze_button = gr.Button("Analyze Voice")
245
+ query_button = gr.Button("Submit Query")
246
+
247
+ analyze_button.click(
248
+ fn=analyze_voice,
249
+ inputs=[audio_input, language],
250
+ outputs=output
251
+ )
252
+ query_button.click(
253
+ fn=handle_health_query,
254
+ inputs=[query_input, language],
255
+ outputs=output
256
+ )
257
+
258
+ return demo
259
 
260
+ if __name__ == "__main__":
261
+ demo = create_gradio_interface()
262
+ demo.launch(server_name="0.0.0.0", server_port=7860)