Blaiseboy commited on
Commit
c1fcc67
Β·
verified Β·
1 Parent(s): 9f44287

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +695 -0
app.py ADDED
@@ -0,0 +1,695 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BioGPT Medical Chatbot with Gradio Interface - HUGGING FACE SPACES VERSION
2
+
3
+ import gradio as gr
4
+ import torch
5
+ import warnings
6
+ import numpy as np
7
+ import os
8
+ import re
9
+ import time
10
+ from datetime import datetime
11
+ from typing import List, Dict, Optional, Tuple
12
+ import json
13
+
14
+ # Install required packages if not already installed
15
+ try:
16
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
17
+ from sentence_transformers import SentenceTransformer
18
+ import faiss
19
+ except ImportError:
20
+ print("Installing required packages...")
21
+ import subprocess
22
+ import sys
23
+
24
+ packages = [
25
+ "transformers>=4.21.0",
26
+ "torch>=1.12.0",
27
+ "sentence-transformers",
28
+ "faiss-cpu",
29
+ "accelerate",
30
+ "bitsandbytes",
31
+ "datasets",
32
+ "numpy",
33
+ "sacremoses",
34
+ "scipy"
35
+ ]
36
+
37
+ for package in packages:
38
+ try:
39
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
40
+ except Exception as e:
41
+ print(f"Failed to install {package}: {e}")
42
+
43
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
44
+ from sentence_transformers import SentenceTransformer
45
+ import faiss
46
+
47
+ # Suppress warnings
48
+ warnings.filterwarnings('ignore')
49
+
50
+ class GradioBioGPTChatbot:
51
+ def __init__(self, use_gpu=False, use_8bit=False): # Default to CPU for HF Spaces
52
+ """Initialize BioGPT chatbot for Gradio deployment"""
53
+ self.device = "cuda" if torch.cuda.is_available() and use_gpu else "cpu"
54
+ self.use_8bit = use_8bit and torch.cuda.is_available()
55
+
56
+ print(f"πŸ”§ Initializing on device: {self.device}")
57
+
58
+ # Initialize components with error handling
59
+ try:
60
+ self.setup_embeddings()
61
+ self.setup_faiss_index()
62
+ self.setup_biogpt()
63
+ except Exception as e:
64
+ print(f"❌ Initialization error: {e}")
65
+ self.model = None
66
+ self.tokenizer = None
67
+ self.embedding_model = None
68
+
69
+ # Conversation tracking
70
+ self.conversation_history = []
71
+ self.knowledge_chunks = []
72
+ self.is_data_loaded = False
73
+
74
+ def setup_embeddings(self):
75
+ """Setup medical-optimized embeddings with error handling"""
76
+ try:
77
+ print("πŸ”„ Loading embedding model...")
78
+ self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
79
+ self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
80
+ self.use_embeddings = True
81
+ print("βœ… Embeddings loaded successfully")
82
+ except Exception as e:
83
+ print(f"❌ Embeddings setup failed: {e}")
84
+ self.embedding_model = None
85
+ self.embedding_dim = 384
86
+ self.use_embeddings = False
87
+
88
+ def setup_faiss_index(self):
89
+ """Setup FAISS for vector search with error handling"""
90
+ try:
91
+ print("πŸ”„ Setting up FAISS index...")
92
+ self.faiss_index = faiss.IndexFlatIP(self.embedding_dim)
93
+ self.faiss_ready = True
94
+ print("βœ… FAISS index ready")
95
+ except Exception as e:
96
+ print(f"❌ FAISS setup failed: {e}")
97
+ self.faiss_index = None
98
+ self.faiss_ready = False
99
+
100
+ def setup_biogpt(self):
101
+ """Setup BioGPT model with optimizations and fallbacks"""
102
+ print("πŸ”„ Loading BioGPT model...")
103
+
104
+ models_to_try = [
105
+ "microsoft/BioGPT", # Smaller version
106
+ "microsoft/DialoGPT-medium", # Fallback 1
107
+ "microsoft/DialoGPT-small", # Fallback 2
108
+ "gpt2" # Final fallback
109
+ ]
110
+
111
+ for model_name in models_to_try:
112
+ try:
113
+ print(f"πŸ”„ Trying model: {model_name}")
114
+
115
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
116
+ if self.tokenizer.pad_token is None:
117
+ self.tokenizer.pad_token = self.tokenizer.eos_token
118
+
119
+ if self.device == "cuda" and self.use_8bit:
120
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=6.0)
121
+ else:
122
+ quantization_config = None
123
+
124
+ self.model = AutoModelForCausalLM.from_pretrained(
125
+ model_name,
126
+ quantization_config=quantization_config,
127
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
128
+ device_map="auto" if self.device == "cuda" else None,
129
+ trust_remote_code=True,
130
+ low_cpu_mem_usage=True
131
+ )
132
+
133
+ if self.device == "cpu":
134
+ self.model = self.model.to(self.device)
135
+
136
+ print(f"βœ… Successfully loaded: {model_name}")
137
+ return # Exit after first successful model load
138
+
139
+ except Exception as e:
140
+ print(f"❌ Failed to load {model_name}: {e}")
141
+ continue
142
+
143
+ print("❌ All models failed to load")
144
+ self.model = None
145
+ self.tokenizer = None
146
+
147
+ def create_medical_chunks(self, text: str, chunk_size: int = 300) -> List[Dict]:
148
+ """Create medically-optimized text chunks with smaller size for efficiency"""
149
+ chunks = []
150
+
151
+ # Split by medical sections first
152
+ medical_sections = self.split_by_medical_sections(text)
153
+
154
+ chunk_id = 0
155
+ for section in medical_sections:
156
+ if len(section.split()) > chunk_size:
157
+ # Split large sections by sentences
158
+ sentences = re.split(r'[.!?]+', section)
159
+ current_chunk = ""
160
+
161
+ for sentence in sentences:
162
+ sentence = sentence.strip()
163
+ if not sentence:
164
+ continue
165
+
166
+ if len(current_chunk.split()) + len(sentence.split()) < chunk_size:
167
+ current_chunk += sentence + ". "
168
+ else:
169
+ if current_chunk.strip():
170
+ chunks.append({
171
+ 'id': chunk_id,
172
+ 'text': current_chunk.strip(),
173
+ 'medical_focus': self.identify_medical_focus(current_chunk)
174
+ })
175
+ chunk_id += 1
176
+ current_chunk = sentence + ". "
177
+
178
+ if current_chunk.strip():
179
+ chunks.append({
180
+ 'id': chunk_id,
181
+ 'text': current_chunk.strip(),
182
+ 'medical_focus': self.identify_medical_focus(current_chunk)
183
+ })
184
+ chunk_id += 1
185
+ else:
186
+ if section.strip(): # Don't add empty sections
187
+ chunks.append({
188
+ 'id': chunk_id,
189
+ 'text': section,
190
+ 'medical_focus': self.identify_medical_focus(section)
191
+ })
192
+ chunk_id += 1
193
+
194
+ return chunks
195
+
196
+ def split_by_medical_sections(self, text: str) -> List[str]:
197
+ """Split text by medical sections"""
198
+ section_patterns = [
199
+ r'\n\s*(?:SYMPTOMS?|TREATMENT|DIAGNOSIS|CAUSES?|PREVENTION|MANAGEMENT).*?\n',
200
+ r'\n\s*\d+\.\s+',
201
+ r'\n\n+'
202
+ ]
203
+
204
+ sections = [text]
205
+ for pattern in section_patterns:
206
+ new_sections = []
207
+ for section in sections:
208
+ splits = re.split(pattern, section, flags=re.IGNORECASE)
209
+ new_sections.extend([s.strip() for s in splits if len(s.strip()) > 50]) # Reduced minimum length
210
+ sections = new_sections
211
+
212
+ return sections
213
+
214
+ def identify_medical_focus(self, text: str) -> str:
215
+ """Identify the medical focus of a text chunk"""
216
+ text_lower = text.lower()
217
+
218
+ categories = {
219
+ 'pediatric_symptoms': ['fever', 'cough', 'rash', 'vomiting', 'diarrhea', 'child', 'baby', 'infant'],
220
+ 'treatments': ['treatment', 'therapy', 'medication', 'antibiotics', 'medicine'],
221
+ 'diagnosis': ['diagnosis', 'diagnostic', 'symptoms', 'signs', 'condition'],
222
+ 'emergency': ['emergency', 'urgent', 'serious', 'hospital', 'call doctor'],
223
+ 'prevention': ['prevention', 'vaccine', 'immunization', 'avoid', 'prevent']
224
+ }
225
+
226
+ for category, keywords in categories.items():
227
+ if any(keyword in text_lower for keyword in keywords):
228
+ return category
229
+
230
+ return 'general_medical'
231
+
232
+ def load_medical_data_from_file(self, file_path: str) -> Tuple[str, bool]:
233
+ """Load medical data from uploaded file with better error handling"""
234
+ if not file_path or not os.path.exists(file_path):
235
+ return "❌ No file uploaded or file not found.", False
236
+
237
+ try:
238
+ print(f"πŸ”„ Processing file: {file_path}")
239
+
240
+ # Read file with encoding detection
241
+ encodings_to_try = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
242
+ text = None
243
+
244
+ for encoding in encodings_to_try:
245
+ try:
246
+ with open(file_path, 'r', encoding=encoding) as f:
247
+ text = f.read()
248
+ print(f"βœ… File read successfully with {encoding} encoding")
249
+ break
250
+ except UnicodeDecodeError:
251
+ continue
252
+
253
+ if text is None:
254
+ return "❌ Could not read file. Please ensure it's a valid text file.", False
255
+
256
+ if len(text.strip()) < 100:
257
+ return "❌ File appears to be too short or empty. Please upload a substantial medical text.", False
258
+
259
+ # Create chunks
260
+ print("πŸ”„ Creating medical chunks...")
261
+ chunks = self.create_medical_chunks(text)
262
+
263
+ if not chunks:
264
+ return "❌ No valid medical content found in the file.", False
265
+
266
+ self.knowledge_chunks = chunks
267
+ print(f"βœ… Created {len(chunks)} chunks")
268
+
269
+ # Generate embeddings if available
270
+ if self.use_embeddings and self.embedding_model and self.faiss_ready:
271
+ print("πŸ”„ Generating embeddings...")
272
+ success = self.generate_embeddings_and_index(chunks)
273
+ if success:
274
+ self.is_data_loaded = True
275
+ return f"βœ… Medical data loaded successfully! {len(chunks)} chunks processed with vector search.", True
276
+
277
+ self.is_data_loaded = True
278
+ return f"βœ… Medical data loaded successfully! {len(chunks)} chunks processed (keyword search mode).", True
279
+
280
+ except Exception as e:
281
+ print(f"❌ Error processing file: {e}")
282
+ return f"❌ Error loading file: {str(e)}", False
283
+
284
+ def generate_embeddings_and_index(self, chunks: List[Dict]) -> bool:
285
+ """Generate embeddings and add to FAISS index with error handling"""
286
+ try:
287
+ print("πŸ”„ Generating embeddings...")
288
+ texts = [chunk['text'] for chunk in chunks]
289
+
290
+ # Process in batches to avoid memory issues
291
+ batch_size = 32
292
+ all_embeddings = []
293
+
294
+ for i in range(0, len(texts), batch_size):
295
+ batch_texts = texts[i:i+batch_size]
296
+ batch_embeddings = self.embedding_model.encode(batch_texts, show_progress_bar=False)
297
+ all_embeddings.append(batch_embeddings)
298
+
299
+ embeddings = np.vstack(all_embeddings)
300
+ self.faiss_index.add(embeddings.astype('float32'))
301
+ print(f"βœ… Added {len(embeddings)} embeddings to FAISS index")
302
+ return True
303
+
304
+ except Exception as e:
305
+ print(f"❌ Embedding generation failed: {e}")
306
+ return False
307
+
308
+ def retrieve_medical_context(self, query: str, n_results: int = 3) -> List[str]:
309
+ """Retrieve relevant medical context with fallback"""
310
+ if not self.knowledge_chunks:
311
+ return []
312
+
313
+ if self.use_embeddings and self.embedding_model and self.faiss_ready:
314
+ try:
315
+ query_embedding = self.embedding_model.encode([query])
316
+ distances, indices = self.faiss_index.search(query_embedding.astype('float32'), n_results)
317
+ context_chunks = []
318
+ for i in indices[0]:
319
+ if i != -1 and i < len(self.knowledge_chunks):
320
+ context_chunks.append(self.knowledge_chunks[i]['text'])
321
+
322
+ if context_chunks:
323
+ return context_chunks
324
+ except Exception as e:
325
+ print(f"❌ Embedding search failed: {e}")
326
+
327
+ # Fallback to keyword search
328
+ return self.keyword_search_medical(query, n_results)
329
+
330
+ def keyword_search_medical(self, query: str, n_results: int) -> List[str]:
331
+ """Medical-focused keyword search"""
332
+ if not self.knowledge_chunks:
333
+ return []
334
+
335
+ query_words = set(query.lower().split())
336
+ chunk_scores = []
337
+
338
+ for chunk_info in self.knowledge_chunks:
339
+ chunk_text = chunk_info['text']
340
+ chunk_words = set(chunk_text.lower().split())
341
+
342
+ word_overlap = len(query_words.intersection(chunk_words))
343
+ base_score = word_overlap / len(query_words) if query_words else 0
344
+
345
+ # Boost medical content
346
+ medical_boost = 0
347
+ if chunk_info.get('medical_focus') in ['pediatric_symptoms', 'treatments', 'diagnosis']:
348
+ medical_boost = 0.3
349
+
350
+ final_score = base_score + medical_boost
351
+
352
+ if final_score > 0:
353
+ chunk_scores.append((final_score, chunk_text))
354
+
355
+ chunk_scores.sort(reverse=True)
356
+ return [chunk for _, chunk in chunk_scores[:n_results]]
357
+
358
+ def generate_biogpt_response(self, context: str, query: str) -> str:
359
+ """Generate medical response using loaded model"""
360
+ if not self.model or not self.tokenizer:
361
+ return "Medical AI model is not available. Using fallback response based on retrieved context."
362
+
363
+ try:
364
+ # Simplified prompt for better compatibility
365
+ prompt = f"Context: {context[:600]}\n\nQuestion: {query}\n\nAnswer:"
366
+
367
+ inputs = self.tokenizer(
368
+ prompt,
369
+ return_tensors="pt",
370
+ truncation=True,
371
+ max_length=512, # Reduced for efficiency
372
+ padding=True
373
+ )
374
+
375
+ if self.device == "cuda":
376
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
377
+
378
+ with torch.no_grad():
379
+ outputs = self.model.generate(
380
+ **inputs,
381
+ max_new_tokens=100, # Reduced for efficiency
382
+ do_sample=True,
383
+ temperature=0.7,
384
+ top_p=0.9,
385
+ pad_token_id=self.tokenizer.eos_token_id,
386
+ repetition_penalty=1.1,
387
+ no_repeat_ngram_size=3
388
+ )
389
+
390
+ full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
391
+
392
+ if "Answer:" in full_response:
393
+ generated_response = full_response.split("Answer:")[-1].strip()
394
+ else:
395
+ generated_response = full_response[len(prompt):].strip()
396
+
397
+ return self.clean_medical_response(generated_response) if generated_response else self.fallback_response(context, query)
398
+
399
+ except Exception as e:
400
+ print(f"❌ Generation failed: {e}")
401
+ return self.fallback_response(context, query)
402
+
403
+ def clean_medical_response(self, response: str) -> str:
404
+ """Clean and format medical response"""
405
+ if not response:
406
+ return "I couldn't generate a specific response. Please consult a healthcare professional."
407
+
408
+ # Remove incomplete sentences and clean up
409
+ sentences = re.split(r'[.!?]+', response)
410
+ clean_sentences = []
411
+
412
+ for sentence in sentences:
413
+ sentence = sentence.strip()
414
+ if len(sentence) > 15 and not sentence.endswith(('and', 'or', 'but', 'however', 'the', 'a', 'an')):
415
+ clean_sentences.append(sentence)
416
+ if len(clean_sentences) >= 2: # Limit to 2 sentences for clarity
417
+ break
418
+
419
+ if clean_sentences:
420
+ cleaned = '. '.join(clean_sentences) + '.'
421
+ else:
422
+ cleaned = response[:150] + '...' if len(response) > 150 else response
423
+
424
+ return cleaned
425
+
426
+ def fallback_response(self, context: str, query: str) -> str:
427
+ """Fallback response when model generation fails"""
428
+ if not context:
429
+ return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional."
430
+
431
+ # Extract most relevant sentences from context
432
+ sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20]
433
+
434
+ if sentences:
435
+ # Return first 1-2 most relevant sentences
436
+ response = sentences[0]
437
+ if len(sentences) > 1 and len(response) < 100:
438
+ response += '. ' + sentences[1]
439
+ response += '.'
440
+ else:
441
+ response = context[:200] + '...' if len(context) > 200 else context
442
+
443
+ return response
444
+
445
+ def handle_conversational_interactions(self, query: str) -> Optional[str]:
446
+ """Handle conversational interactions"""
447
+ query_lower = query.lower().strip()
448
+
449
+ # Greetings
450
+ if query_lower in ['hello', 'hi', 'hey', 'good morning', 'good afternoon']:
451
+ if not self.is_data_loaded:
452
+ return "πŸ‘‹ Hello! I'm your medical AI assistant. Please upload your medical data file first, then ask me any health-related questions!"
453
+ else:
454
+ return "πŸ‘‹ Hello again! I'm ready to help. Ask me any medical question related to your uploaded data."
455
+
456
+
457
+ # Thanks
458
+ if any(thanks in query_lower for thanks in ['thank you', 'thanks', 'thx', 'appreciate']):
459
+ return "πŸ™ You're welcome! Remember to always consult healthcare professionals for medical decisions. Feel free to ask more questions!"
460
+
461
+ # Goodbyes
462
+ if any(bye in query_lower for bye in ['bye', 'goodbye', 'see you', 'farewell']):
463
+ return "πŸ‘‹ Goodbye! Take care and stay healthy! πŸ₯"
464
+
465
+ # Help/About
466
+ if any(help_word in query_lower for help_word in ['help', 'what can you do', 'how do you work']):
467
+ return """πŸ€– **Medical AI Assistant**
468
+
469
+ I can help with:
470
+ β€’ Medical information and conditions
471
+ β€’ Symptom understanding
472
+ β€’ Treatment information
473
+ β€’ When to seek medical care
474
+
475
+ **How to use:**
476
+ 1. Upload your medical data file
477
+ 2. Ask specific medical questions
478
+ 3. Get evidence-based information
479
+
480
+ ⚠️ **Important:** I provide educational information only. Always consult healthcare professionals for medical advice."""
481
+
482
+ return None
483
+
484
+ def chat_interface(self, message: str, history: List[List[str]]) -> Tuple[str, List[List[str]]]:
485
+ """Main chat interface for Gradio"""
486
+ if not message.strip():
487
+ return "", history
488
+
489
+ # Check if data is loaded
490
+ if not self.is_data_loaded:
491
+ response = "⚠️ Please upload your medical data file first using the file upload above before asking questions."
492
+ history.append([message, response])
493
+ return "", history
494
+
495
+ # Handle conversational interactions
496
+ conversational_response = self.handle_conversational_interactions(message)
497
+ if conversational_response:
498
+ history.append([message, conversational_response])
499
+ return "", history
500
+
501
+ # Process medical query
502
+ try:
503
+ context = self.retrieve_medical_context(message)
504
+
505
+ if not context:
506
+ response = "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
507
+ else:
508
+ main_context = '\n\n'.join(context)
509
+ medical_response = self.generate_biogpt_response(main_context, message)
510
+ response = f"🩺 **Medical Information:** {medical_response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
511
+
512
+ # Add to conversation history
513
+ self.conversation_history.append({
514
+ 'query': message,
515
+ 'response': response,
516
+ 'timestamp': datetime.now().isoformat()
517
+ })
518
+
519
+ history.append([message, response])
520
+ return "", history
521
+
522
+ except Exception as e:
523
+ print(f"❌ Chat interface error: {e}")
524
+ error_response = "I encountered an error processing your question. Please try again or consult a healthcare professional."
525
+ history.append([message, error_response])
526
+ return "", history
527
+
528
+ # Initialize the chatbot with error handling
529
+ print("πŸš€ Initializing Medical AI Assistant...")
530
+ try:
531
+ chatbot = GradioBioGPTChatbot(use_gpu=False, use_8bit=False) # CPU-optimized for HF Spaces
532
+ print("βœ… Chatbot initialized successfully")
533
+ except Exception as e:
534
+ print(f"❌ Chatbot initialization failed: {e}")
535
+ chatbot = None
536
+
537
+ def upload_and_process_file(file):
538
+ """Handle file upload and processing"""
539
+ if file is None:
540
+ return "❌ No file uploaded."
541
+
542
+ if chatbot is None:
543
+ return "❌ Chatbot not initialized properly. Please refresh the page."
544
+
545
+ try:
546
+ message, success = chatbot.load_medical_data_from_file(file)
547
+ return message
548
+ except Exception as e:
549
+ return f"❌ Error processing file: {str(e)}"
550
+
551
+ # Create Gradio Interface
552
+ def create_gradio_interface():
553
+ """Create and launch Gradio interface"""
554
+
555
+ with gr.Blocks(
556
+ title="πŸ₯ Medical AI Assistant",
557
+ theme=gr.themes.Soft(),
558
+ css="""
559
+ .gradio-container {
560
+ max-width: 1200px !important;
561
+ }
562
+ .chat-message {
563
+ border-radius: 10px !important;
564
+ }
565
+ """
566
+ ) as demo:
567
+
568
+ gr.HTML("""
569
+ <div style="text-align: center; padding: 20px;">
570
+ <h1>πŸ₯ Medical AI Assistant</h1>
571
+ <p style="font-size: 18px; color: #666;">
572
+ AI-powered medical information assistant
573
+ </p>
574
+ <p style="color: #888;">
575
+ ⚠️ For educational purposes only. Always consult healthcare professionals for medical advice.
576
+ </p>
577
+ </div>
578
+ """)
579
+
580
+ with gr.Row():
581
+ with gr.Column(scale=1):
582
+ gr.HTML("<h3>πŸ“ Upload Medical Data</h3>")
583
+ file_upload = gr.File(
584
+ label="Upload Medical Text File (.txt)",
585
+ file_types=[".txt"],
586
+ type="filepath"
587
+ )
588
+ upload_status = gr.Textbox(
589
+ label="Upload Status",
590
+ value="πŸ“‹ Please upload your medical data file to begin...",
591
+ interactive=False,
592
+ lines=3
593
+ )
594
+
595
+ gr.HTML("""
596
+ <div style="margin-top: 20px; padding: 15px; background-color: #f0f8ff; border-radius: 10px;">
597
+ <h4>πŸ’‘ How to Use:</h4>
598
+ <ol>
599
+ <li>Upload your medical text file (.txt format)</li>
600
+ <li>Wait for processing confirmation</li>
601
+ <li>Start asking medical questions!</li>
602
+ </ol>
603
+
604
+ <h4>πŸ“ Example Questions:</h4>
605
+ <ul>
606
+ <li>"What causes fever in children?"</li>
607
+ <li>"How to treat a persistent cough?"</li>
608
+ <li>"When should I call the doctor?"</li>
609
+ <li>"Signs of dehydration in infants?"</li>
610
+ </ul>
611
+ </div>
612
+ """)
613
+
614
+ with gr.Column(scale=2):
615
+ gr.HTML("<h3>πŸ’¬ Medical Consultation</h3>")
616
+ chatbot_interface = gr.Chatbot(
617
+ label="Medical AI Chat",
618
+ height=500,
619
+ bubble_full_width=False
620
+ )
621
+
622
+ msg_input = gr.Textbox(
623
+ label="Your Medical Question",
624
+ placeholder="Ask me about health topics, symptoms, treatments, or when to seek care...",
625
+ lines=2
626
+ )
627
+
628
+ with gr.Row():
629
+ send_btn = gr.Button("🩺 Send Question", variant="primary")
630
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
631
+
632
+ # Event handlers with error handling
633
+ def safe_upload_handler(file):
634
+ try:
635
+ return upload_and_process_file(file)
636
+ except Exception as e:
637
+ return f"❌ Upload error: {str(e)}"
638
+
639
+ def safe_chat_handler(message, history):
640
+ try:
641
+ if chatbot is None:
642
+ return "", history + [[message, "❌ System error. Please refresh the page."]]
643
+ return chatbot.chat_interface(message, history)
644
+ except Exception as e:
645
+ return "", history + [[message, f"❌ Error: {str(e)}"]]
646
+
647
+ file_upload.change(
648
+ fn=safe_upload_handler,
649
+ inputs=[file_upload],
650
+ outputs=[upload_status]
651
+ )
652
+
653
+ msg_input.submit(
654
+ fn=safe_chat_handler,
655
+ inputs=[msg_input, chatbot_interface],
656
+ outputs=[msg_input, chatbot_interface]
657
+ )
658
+
659
+ send_btn.click(
660
+ fn=safe_chat_handler,
661
+ inputs=[msg_input, chatbot_interface],
662
+ outputs=[msg_input, chatbot_interface]
663
+ )
664
+
665
+ clear_btn.click(
666
+ fn=lambda: ([], ""),
667
+ outputs=[chatbot_interface, msg_input]
668
+ )
669
+
670
+ gr.HTML("""
671
+ <div style="text-align: center; margin-top: 30px; padding: 20px; background-color: #fff3cd; border-radius: 10px;">
672
+ <h4>⚠️ Medical Disclaimer</h4>
673
+ <p>This AI assistant provides educational medical information only and is not a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of qualified healthcare providers with questions about medical conditions.</p>
674
+ </div>
675
+ """)
676
+
677
+ return demo
678
+
679
+ if __name__ == "__main__":
680
+ # Create and launch the Gradio interface
681
+ demo = create_gradio_interface()
682
+
683
+ print("🌐 Launching Gradio interface...")
684
+ print("πŸ“‹ Upload your medical data file and start chatting!")
685
+
686
+ # Launch with HF Spaces optimized settings
687
+ demo.launch(
688
+ share=False,
689
+ server_name="0.0.0.0",
690
+ server_port=7860,
691
+ show_error=True,
692
+ show_tips=False,
693
+ enable_queue=True,
694
+ max_threads=40
695
+ )