Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,218 +1,262 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
<body>
|
13 |
-
<div id="root"></div>
|
14 |
-
<script type="text/babel">
|
15 |
-
const { useState, useEffect } = React;
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
const [transcription, setTranscription] = useState('');
|
21 |
-
const [prediction, setPrediction] = useState('');
|
22 |
-
const [confidence, setConfidence] = useState(0);
|
23 |
-
const [error, setError] = useState('');
|
24 |
-
const [mediaRecorder, setMediaRecorder] = useState(null);
|
25 |
-
const [language, setLanguage] = useState('en');
|
26 |
-
const [query, setQuery] = useState('');
|
27 |
-
const [ttsResponse, setTtsResponse] = useState('');
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
mediaRecorder.stop();
|
60 |
-
setRecording(false);
|
61 |
-
}
|
62 |
-
};
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
return;
|
69 |
-
}
|
70 |
-
setError('');
|
71 |
-
setTranscription('');
|
72 |
-
setPrediction('');
|
73 |
-
setConfidence(0);
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
const data = await response.json();
|
88 |
-
if (data.error) {
|
89 |
-
setError(data.error);
|
90 |
-
speak(data.error);
|
91 |
-
} else {
|
92 |
-
setTranscription(data.transcription || 'No transcription available.');
|
93 |
-
setPrediction(data.prediction || 'No health condition predicted.');
|
94 |
-
setConfidence(data.confidence || 0);
|
95 |
-
const feedback = data.prediction === 'No health condition predicted'
|
96 |
-
? 'No significant health indicators detected.'
|
97 |
-
: `Possible health condition: ${data.prediction} (confidence: ${data.confidence.toFixed(4)}). Consult a doctor.`;
|
98 |
-
const fullFeedback = `${feedback}\n\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.`;
|
99 |
-
speak(fullFeedback);
|
100 |
-
}
|
101 |
-
} catch (err) {
|
102 |
-
setError('Error analyzing audio: ' + err.message);
|
103 |
-
speak('Error analyzing audio.');
|
104 |
-
}
|
105 |
-
};
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
<option value="zh">Mandarin</option>
|
142 |
-
</select>
|
143 |
-
</div>
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
<div className="mb-4">
|
187 |
-
<h2 className="text-xl font-semibold text-gray-700">Transcription</h2>
|
188 |
-
<p className="text-gray-600">{transcription}</p>
|
189 |
-
</div>
|
190 |
-
)}
|
191 |
-
{prediction && (
|
192 |
-
<div className="mb-4">
|
193 |
-
<h2 className="text-xl font-semibold text-gray-700">Health Assessment</h2>
|
194 |
-
<p className="text-gray-600">
|
195 |
-
{prediction === 'No health condition predicted'
|
196 |
-
? 'No significant health indicators detected.'
|
197 |
-
: `Possible health condition: ${prediction} (confidence: ${confidence.toFixed(4)}). Consult a doctor.`}
|
198 |
-
</p>
|
199 |
-
<p className="text-gray-500 text-sm mt-2">
|
200 |
-
**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
|
201 |
-
</p>
|
202 |
-
</div>
|
203 |
-
)}
|
204 |
-
{ttsResponse && (
|
205 |
-
<div className="mb-4">
|
206 |
-
<h2 className="text-xl font-semibold text-gray-700">Query Response</h2>
|
207 |
-
<p className="text-gray-600">{ttsResponse}</p>
|
208 |
-
</div>
|
209 |
-
)}
|
210 |
-
</div>
|
211 |
-
</div>
|
212 |
-
);
|
213 |
-
};
|
214 |
-
|
215 |
-
ReactDOM.render(<HealthVoiceAnalyzer />, document.getElementById('root'));
|
216 |
-
</script>
|
217 |
-
</body>
|
218 |
-
</html>
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
import os
|
5 |
+
import hashlib
|
6 |
+
from datetime import datetime
|
7 |
+
import soundfile as sf
|
8 |
+
import torch
|
9 |
+
from tenacity import retry, stop_after_attempt, wait_fixed
|
10 |
+
import pyttsx3
|
11 |
+
from transformers import pipeline
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# Initialize text-to-speech engine
|
14 |
+
tts_engine = pyttsx3.init()
|
15 |
+
tts_engine.setProperty('rate', 150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Initialize local models with retry logic
|
18 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
19 |
+
def load_whisper_model():
|
20 |
+
try:
|
21 |
+
model = pipeline(
|
22 |
+
"automatic-speech-recognition",
|
23 |
+
model="openai/whisper-tiny", # Multilingual model
|
24 |
+
device=-1, # CPU; use device=0 for GPU if available
|
25 |
+
model_kwargs={"use_safetensors": True}
|
26 |
+
)
|
27 |
+
print("Whisper model loaded successfully.")
|
28 |
+
return model
|
29 |
+
except Exception as e:
|
30 |
+
print(f"Failed to load Whisper model: {str(e)}")
|
31 |
+
raise
|
32 |
|
33 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
34 |
+
def load_symptom_model():
|
35 |
+
try:
|
36 |
+
model = pipeline(
|
37 |
+
"text-classification",
|
38 |
+
model="abhirajeshbhai/symptom-2-disease-net",
|
39 |
+
device=-1, # CPU
|
40 |
+
model_kwargs={"use_safetensors": True}
|
41 |
+
)
|
42 |
+
print("Symptom-2-Disease model loaded successfully.")
|
43 |
+
return model
|
44 |
+
except Exception as e:
|
45 |
+
print(f"Failed to load Symptom-2-Disease model: {str(e)}")
|
46 |
+
# Fallback to a generic model
|
47 |
+
try:
|
48 |
+
model = pipeline(
|
49 |
+
"text-classification",
|
50 |
+
model="distilbert-base-uncased",
|
51 |
+
device=-1
|
52 |
+
)
|
53 |
+
print("Fallback to distilbert-base-uncased model.")
|
54 |
+
return model
|
55 |
+
except Exception as fallback_e:
|
56 |
+
print(f"Fallback model failed: {str(fallback_e)}")
|
57 |
+
raise
|
58 |
|
59 |
+
whisper = None
|
60 |
+
symptom_classifier = None
|
61 |
+
is_fallback_model = False
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
try:
|
64 |
+
whisper = load_whisper_model()
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Whisper model initialization failed after retries: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
try:
|
69 |
+
symptom_classifier = load_symptom_model()
|
70 |
+
except Exception as e:
|
71 |
+
print(f"Symptom model initialization failed after retries: {str(e)}")
|
72 |
+
symptom_classifier = None
|
73 |
+
is_fallback_model = True
|
74 |
|
75 |
+
def compute_file_hash(file_path):
|
76 |
+
"""Compute MD5 hash of a file to check uniqueness."""
|
77 |
+
hash_md5 = hashlib.md5()
|
78 |
+
with open(file_path, "rb") as f:
|
79 |
+
for chunk in iter(lambda: f.read(4096), b""):
|
80 |
+
hash_md5.update(chunk)
|
81 |
+
return hash_md5.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
def transcribe_audio(audio_file, language="en"):
|
84 |
+
"""Transcribe audio using local Whisper model."""
|
85 |
+
if not whisper:
|
86 |
+
return "Error: Whisper model not loaded. Check logs for details or ensure sufficient compute resources."
|
87 |
+
try:
|
88 |
+
# Load and validate audio
|
89 |
+
audio, sr = librosa.load(audio_file, sr=16000)
|
90 |
+
if len(audio) < 1600: # Less than 0.1s
|
91 |
+
return "Error: Audio too short. Please provide audio of at least 1 second."
|
92 |
+
if np.max(np.abs(audio)) < 1e-4: # Too quiet
|
93 |
+
return "Error: Audio too quiet. Please provide clear audio describing symptoms."
|
94 |
+
|
95 |
+
# Save as WAV for Whisper
|
96 |
+
temp_wav = f"/tmp/{os.path.basename(audio_file)}.wav"
|
97 |
+
sf.write(temp_wav, audio, sr)
|
98 |
+
|
99 |
+
# Transcribe with beam search and language
|
100 |
+
with torch.no_grad():
|
101 |
+
result = whisper(temp_wav, generate_kwargs={"num_beams": 5, "language": language})
|
102 |
+
transcription = result.get("text", "").strip()
|
103 |
+
print(f"Transcription: {transcription}")
|
104 |
+
|
105 |
+
# Clean up temp file
|
106 |
+
try:
|
107 |
+
os.remove(temp_wav)
|
108 |
+
except Exception:
|
109 |
+
pass
|
110 |
+
|
111 |
+
if not transcription:
|
112 |
+
return "Transcription empty. Please provide clear audio describing symptoms."
|
113 |
+
# Check for repetitive transcription
|
114 |
+
words = transcription.split()
|
115 |
+
if len(words) > 5 and len(set(words)) < len(words) / 2:
|
116 |
+
return "Error: Transcription appears repetitive. Please provide clear, non-repetitive audio describing symptoms."
|
117 |
+
return transcription
|
118 |
+
except Exception as e:
|
119 |
+
return f"Error transcribing audio: {str(e)}"
|
120 |
|
121 |
+
def analyze_symptoms(text):
|
122 |
+
"""Analyze symptoms using local Symptom-2-Disease model."""
|
123 |
+
if not symptom_classifier:
|
124 |
+
return "Error: Symptom-2-Disease model not loaded. Check logs for details or ensure sufficient compute resources.", 0.0
|
125 |
+
try:
|
126 |
+
if not text or "Error transcribing" in text:
|
127 |
+
return "No valid transcription for analysis.", 0.0
|
128 |
+
with torch.no_grad():
|
129 |
+
result = symptom_classifier(text)
|
130 |
+
if result and isinstance(result, list) and len(result) > 0:
|
131 |
+
prediction = result[0]["label"]
|
132 |
+
score = result[0]["score"]
|
133 |
+
if is_fallback_model:
|
134 |
+
print("Warning: Using fallback model (distilbert-base-uncased). Results may be less accurate.")
|
135 |
+
prediction = f"{prediction} (using fallback model)"
|
136 |
+
print(f"Health Prediction: {prediction}, Score: {score:.4f}")
|
137 |
+
return prediction, score
|
138 |
+
return "No health condition predicted", 0.0
|
139 |
+
except Exception as e:
|
140 |
+
return f"Error analyzing symptoms: {str(e)}", 0.0
|
141 |
|
142 |
+
def handle_health_query(query, language="en"):
|
143 |
+
"""Handle health-related queries with a simple response."""
|
144 |
+
if not query:
|
145 |
+
return "Please provide a valid health query."
|
146 |
+
# Placeholder for Q&A logic (could integrate a model like BERT for Q&A)
|
147 |
+
response = f"Response to query '{query}': For accurate health information, consult a healthcare provider."
|
148 |
+
# Text-to-speech
|
149 |
+
tts_engine.setProperty('voice', language)
|
150 |
+
tts_engine.say(response)
|
151 |
+
tts_engine.runAndWait()
|
152 |
+
return response
|
|
|
|
|
|
|
153 |
|
154 |
+
def analyze_voice(audio_file, language="en"):
|
155 |
+
"""Analyze voice for health indicators."""
|
156 |
+
try:
|
157 |
+
# Ensure unique file name
|
158 |
+
unique_path = f"/tmp/gradio/{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{os.path.basename(audio_file)}"
|
159 |
+
os.rename(audio_file, unique_path)
|
160 |
+
audio_file = unique_path
|
161 |
+
|
162 |
+
# Log audio file info
|
163 |
+
file_hash = compute_file_hash(audio_file)
|
164 |
+
print(f"Processing audio file: {audio_file}, Hash: {file_hash}")
|
165 |
+
|
166 |
+
# Load audio to verify format
|
167 |
+
audio, sr = librosa.load(audio_file, sr=16000)
|
168 |
+
print(f"Audio shape: {audio.shape}, Sampling rate: {sr}, Duration: {len(audio)/sr:.2f}s, Mean: {np.mean(audio):.4f}, Std: {np.std(audio):.4f}")
|
169 |
+
|
170 |
+
# Transcribe audio
|
171 |
+
transcription = transcribe_audio(audio_file, language)
|
172 |
+
if "Error transcribing" in transcription:
|
173 |
+
tts_engine.say(transcription)
|
174 |
+
tts_engine.runAndWait()
|
175 |
+
return transcription
|
176 |
+
|
177 |
+
# Check for medication-related queries
|
178 |
+
if "medicine" in transcription.lower() or "treatment" in transcription.lower():
|
179 |
+
feedback = "Error: This tool does not provide medication or treatment advice. Please describe symptoms only (e.g., 'I have a fever')."
|
180 |
+
feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', File Hash = {file_hash}"
|
181 |
+
feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
|
182 |
+
tts_engine.say(feedback)
|
183 |
+
tts_engine.runAndWait()
|
184 |
+
return feedback
|
185 |
+
|
186 |
+
# Analyze symptoms
|
187 |
+
prediction, score = analyze_symptoms(transcription)
|
188 |
+
if "Error analyzing" in prediction:
|
189 |
+
tts_engine.say(prediction)
|
190 |
+
tts_engine.runAndWait()
|
191 |
+
return prediction
|
192 |
+
|
193 |
+
# Generate feedback
|
194 |
+
if prediction == "No health condition predicted":
|
195 |
+
feedback = "No significant health indicators detected."
|
196 |
+
else:
|
197 |
+
feedback = f"Possible health condition: {prediction} (confidence: {score:.4f}). Consult a doctor."
|
198 |
+
|
199 |
+
feedback += f"\n\n**Debug Info**: Transcription = '{transcription}', Prediction = {prediction}, Confidence = {score:.4f}, File Hash = {file_hash}"
|
200 |
+
feedback += "\n**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice."
|
201 |
+
|
202 |
+
# Text-to-speech for feedback
|
203 |
+
tts_engine.say(feedback)
|
204 |
+
tts_engine.runAndWait()
|
205 |
+
|
206 |
+
# Clean up temporary audio file
|
207 |
+
try:
|
208 |
+
os.remove(audio_file)
|
209 |
+
print(f"Deleted temporary audio file: {audio_file}")
|
210 |
+
except Exception as e:
|
211 |
+
print(f"Failed to delete audio file: {str(e)}")
|
212 |
+
|
213 |
+
return feedback
|
214 |
+
except Exception as e:
|
215 |
+
error_msg = f"Error processing audio: {str(e)}"
|
216 |
+
tts_engine.say(error_msg)
|
217 |
+
tts_engine.runAndWait()
|
218 |
+
return error_msg
|
219 |
|
220 |
+
# Gradio interface
|
221 |
+
def create_gradio_interface():
|
222 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
223 |
+
gr.Markdown(
|
224 |
+
"""
|
225 |
+
# Health Voice Analyzer
|
226 |
+
Record or upload a voice sample describing symptoms in English, Spanish, Hindi, or Mandarin (e.g., 'I have a fever').
|
227 |
+
Ask health questions in the text box below. Supports WAV, 16kHz audio.
|
228 |
+
**Disclaimer**: This is not a diagnostic tool. Consult a healthcare provider for medical advice.
|
229 |
+
"""
|
230 |
+
)
|
231 |
+
with gr.Row():
|
232 |
+
language = gr.Dropdown(
|
233 |
+
choices=["en", "es", "hi", "zh"],
|
234 |
+
label="Select Language",
|
235 |
+
value="en"
|
236 |
+
)
|
237 |
+
with gr.Row():
|
238 |
+
audio_input = gr.Audio(type="filepath", label="Record or Upload Voice")
|
239 |
+
with gr.Row():
|
240 |
+
query_input = gr.Textbox(label="Ask a Health Question (e.g., 'What are symptoms of asthma?')")
|
241 |
+
with gr.Row():
|
242 |
+
output = gr.Textbox(label="Health Assessment Feedback")
|
243 |
+
with gr.Row():
|
244 |
+
analyze_button = gr.Button("Analyze Voice")
|
245 |
+
query_button = gr.Button("Submit Query")
|
246 |
+
|
247 |
+
analyze_button.click(
|
248 |
+
fn=analyze_voice,
|
249 |
+
inputs=[audio_input, language],
|
250 |
+
outputs=output
|
251 |
+
)
|
252 |
+
query_button.click(
|
253 |
+
fn=handle_health_query,
|
254 |
+
inputs=[query_input, language],
|
255 |
+
outputs=output
|
256 |
+
)
|
257 |
+
|
258 |
+
return demo
|
259 |
|
260 |
+
if __name__ == "__main__":
|
261 |
+
demo = create_gradio_interface()
|
262 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|