Spaces:
Sleeping
Sleeping
File size: 51,747 Bytes
ec9ad0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 |
import os
import re
import torch
import warnings
import numpy as np
import gradio as gr
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig
)
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Optional
import time
from datetime import datetime
# Suppress warnings
warnings.filterwarnings('ignore')
class BioGPTMedicalChatbot:
def __init__(self):
"""Initialize BioGPT chatbot for Gradio deployment"""
print("π₯ Initializing BioGPT Pediatric Pulmonology Chatbot...")
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.use_8bit = torch.cuda.is_available()
print(f"π₯οΈ Using device: {self.device}")
# Setup components
self.setup_embeddings()
self.setup_biogpt()
# Knowledge base and conversation tracking
self.knowledge_chunks = []
self.conversation_history = []
# Load default medical knowledge first
self.load_default_medical_knowledge()
# Try to load your specific pediatric pulmonology data
self.load_pediatric_pulmonology_data()
print("β
BioGPT Pediatric Pulmonology Chatbot ready!")
def load_pediatric_pulmonology_data(self):
"""Auto-load pediatric pulmonology data from uploaded file"""
pulmonology_files = [
'Pediatric_cleaned.txt',
'pediatric_cleaned.txt',
'Pediatric_Cleaned.txt',
'pediatric_pulmonology.txt',
'pulmonology_data.txt'
]
for filename in pulmonology_files:
if os.path.exists(filename):
print(f"π Found pediatric pulmonology data: {filename}")
try:
success = self.load_medical_data(filename)
if success:
print(f"β
Successfully loaded {filename} with pulmonology data!")
print(f"π Total knowledge chunks: {len(self.knowledge_chunks)}")
return True
except Exception as e:
print(f"β οΈ Failed to load {filename}: {e}")
continue
print("β οΈ No pediatric pulmonology data file found.")
print(" Expected files: Pediatric_cleaned.txt")
print(" Using default pediatric knowledge only.")
print(f"π Current knowledge chunks: {len(self.knowledge_chunks)}")
return False
def load_medical_data(self, file_path: str):
"""Load and process medical data from text file"""
print(f"π Loading medical data from {file_path}...")
try:
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
print(f"π File loaded: {len(text):,} characters")
except FileNotFoundError:
print(f"β File {file_path} not found!")
return False
except Exception as e:
print(f"β Error reading file: {e}")
return False
# Create chunks optimized for medical content
print("π Creating pediatric pulmonology chunks...")
new_chunks = self.create_medical_chunks_from_text(text)
print(f"π Created {len(new_chunks)} new medical chunks from file")
# Add to existing knowledge chunks (don't replace, append)
starting_id = len(self.knowledge_chunks)
for i, chunk in enumerate(new_chunks):
chunk['id'] = starting_id + i
chunk['source'] = 'pediatric_pulmonology_file'
self.knowledge_chunks.extend(new_chunks)
print(f"β
Medical data loaded successfully!")
print(f"π Total knowledge chunks: {len(self.knowledge_chunks)}")
return True
def create_medical_chunks_from_text(self, text: str, chunk_size: int = 400) -> List[Dict]:
"""Create medically-optimized text chunks from uploaded file"""
chunks = []
# Clean the text first - remove XML/HTML tags and formatting artifacts
cleaned_text = self.clean_medical_text(text)
# Split by medical sections first
medical_sections = self.split_by_medical_sections(cleaned_text)
for section in medical_sections:
if len(section.split()) > chunk_size:
# Split large sections by sentences
sentences = re.split(r'[.!?]+', section)
current_chunk = ""
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
if len(current_chunk.split()) + len(sentence.split()) < chunk_size:
current_chunk += sentence + ". "
else:
if current_chunk.strip():
chunks.append({
'text': current_chunk.strip(),
'medical_focus': self.identify_medical_focus(current_chunk)
})
current_chunk = sentence + ". "
if current_chunk.strip():
chunks.append({
'text': current_chunk.strip(),
'medical_focus': self.identify_medical_focus(current_chunk)
})
else:
if section.strip():
chunks.append({
'text': section.strip(),
'medical_focus': self.identify_medical_focus(section)
})
return chunks
def clean_medical_text(self, text: str) -> str:
"""Clean medical text from formatting artifacts and XML tags"""
# Remove XML/HTML tags like </FREETEXT>, </ABSTRACT>, <SECTION>, etc.
text = re.sub(r'<[^>]+>', '', text)
# Remove common document formatting artifacts
text = re.sub(r'</?\s*FREETEXT\s*>', '', text, flags=re.IGNORECASE)
text = re.sub(r'</?\s*ABSTRACT\s*>', '', text, flags=re.IGNORECASE)
text = re.sub(r'</?\s*SECTION\s*>', '', text, flags=re.IGNORECASE)
text = re.sub(r'</?\s*TITLE\s*>', '', text, flags=re.IGNORECASE)
# Remove excessive whitespace and newlines
text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)
text = re.sub(r'\s+', ' ', text)
# Remove special characters that might be formatting artifacts
text = re.sub(r'[^\w\s.,;:!?()\-\'/"]', ' ', text)
# Clean up multiple spaces
text = re.sub(r'\s+', ' ', text).strip()
return text
def split_by_medical_sections(self, text: str) -> List[str]:
"""Split text by medical sections"""
# Look for medical section headers
section_patterns = [
r'\n\s*(?:SYMPTOMS?|TREATMENT|DIAGNOSIS|CAUSES?|PREVENTION|MANAGEMENT).*?\n',
r'\n\s*\d+\.\s+', # Numbered sections
r'\n\n+' # Paragraph breaks
]
sections = [text]
for pattern in section_patterns:
new_sections = []
for section in sections:
splits = re.split(pattern, section, flags=re.IGNORECASE)
new_sections.extend([s.strip() for s in splits if len(s.strip()) > 100])
sections = new_sections
return sections
def identify_medical_focus(self, text: str) -> str:
"""Identify the medical focus of a text chunk with pulmonology emphasis"""
text_lower = text.lower()
# Enhanced medical categories with pulmonology focus
categories = {
'pediatric_pulmonology': [
'asthma', 'pneumonia', 'bronchiolitis', 'croup', 'respiratory', 'lung', 'airway',
'breathing', 'cough', 'wheeze', 'stridor', 'pneumothorax', 'pleural', 'ventilator',
'oxygen', 'respiratory distress', 'bronchitis', 'pulmonary', 'chest', 'inhaler'
],
'pediatric_symptoms': ['fever', 'rash', 'vomiting', 'diarrhea', 'pain'],
'treatments': ['treatment', 'therapy', 'medication', 'antibiotics', 'steroid'],
'diagnosis': ['diagnosis', 'diagnostic', 'symptoms', 'signs', 'test'],
'emergency': ['emergency', 'urgent', 'serious', 'hospital', 'icu'],
'prevention': ['prevention', 'vaccine', 'immunization', 'avoid']
}
for category, keywords in categories.items():
if any(keyword in text_lower for keyword in keywords):
return category
return 'general_medical'
def setup_embeddings(self):
"""Setup medical embeddings"""
try:
print("π§ Loading embeddings...")
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
self.use_embeddings = True
print("β
Embeddings loaded successfully")
except Exception as e:
print(f"β οΈ Embeddings failed: {e}")
self.embedding_model = None
self.use_embeddings = False
def setup_biogpt(self):
"""Setup BioGPT model with better fallback strategy"""
print("π§ Loading BioGPT model...")
# Try more stable models first
models_to_try = [
"microsoft/DialoGPT-medium", # Most stable conversational model
"microsoft/DialoGPT-small", # Smaller backup
"gpt2-medium", # General GPT-2 backup
"microsoft/BioGPT" # BioGPT if available
]
for model_name in models_to_try:
try:
print(f" Trying {model_name}...")
# Load tokenizer first
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
# Load model with conservative settings
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
device_map="auto" if self.device == "cuda" else None,
trust_remote_code=True,
low_cpu_mem_usage=True
)
if self.device == "cuda":
self.model = self.model.to(self.device)
print(f"β
Successfully loaded {model_name}!")
self.model_name = model_name
return
except Exception as e:
print(f"β Failed to load {model_name}: {e}")
continue
# If all models fail - use rule-based fallback
print("β All models failed to load - using rule-based responses")
self.model = None
self.tokenizer = None
self.model_name = "Rule-based fallback"
def load_default_medical_knowledge(self):
"""Load comprehensive default medical knowledge base"""
default_knowledge = [
{
'id': 0,
'text': "Fever in children is commonly caused by viral infections (most common), bacterial infections, immunizations, teething in infants, or overdressing. Normal body temperature ranges from 97Β°F to 100.4Β°F (36.1Β°C to 38Β°C). A fever is generally considered when oral temperature exceeds 100.4Β°F (38Β°C). Most fevers are not dangerous and help the body fight infection. Treatment includes rest, fluids, and fever reducers like acetaminophen or ibuprofen for comfort.",
'medical_focus': 'pediatric_symptoms',
'source': 'default_knowledge'
},
{
'id': 1,
'text': "Dehydration in infants and children can occur rapidly, especially during illness with vomiting or diarrhea. Warning signs include: dry mouth and tongue, decreased urination, lethargy or irritability, sunken eyes, and in infants under 12 months, sunken fontanelle (soft spot). Mild dehydration can be treated with oral rehydration solutions or clear fluids. Severe dehydration requires immediate medical attention.",
'medical_focus': 'pediatric_symptoms',
'source': 'default_knowledge'
},
{
'id': 2,
'text': "Common cold symptoms in children include runny or stuffy nose, cough, low-grade fever, sneezing, and general fussiness. Most colds are viral and resolve within 7-10 days without specific treatment. Treatment focuses on comfort measures: rest, adequate fluids, humidified air, and saline nasal drops for congestion. Antibiotics are not effective against viral colds.",
'medical_focus': 'pediatric_symptoms',
'source': 'default_knowledge'
},
{
'id': 3,
'text': "Emergency warning signs in children requiring immediate medical attention include: severe difficulty breathing, persistent high fever over 104Β°F (40Β°C), signs of severe dehydration, persistent vomiting preventing fluid intake, severe headache with neck stiffness, altered consciousness or extreme lethargy, severe abdominal pain, or any concerning change in behavior. When in doubt, seek medical care.",
'medical_focus': 'emergency',
'source': 'default_knowledge'
},
{
'id': 4,
'text': "Childhood vaccination schedules protect against serious diseases including measles, mumps, rubella, polio, hepatitis B, Haemophilus influenzae, pneumococcal disease, and others. Vaccines are rigorously tested for safety and effectiveness. Side effects are typically mild, such as low-grade fever or soreness at injection site. Following recommended vaccination schedules protects individual children and the community.",
'medical_focus': 'prevention',
'source': 'default_knowledge'
},
{
'id': 5,
'text': "Persistent cough in children can be caused by viral upper respiratory infections, asthma, allergies, bacterial infections, or irritants. Most coughs from colds resolve within 2-3 weeks. Seek medical evaluation for coughs lasting more than 3 weeks, coughs with blood, difficulty breathing, or coughs severely interfering with sleep. Treatment depends on the underlying cause.",
'medical_focus': 'pediatric_symptoms',
'source': 'default_knowledge'
},
{
'id': 6,
'text': "Asthma in children is a chronic respiratory condition affecting the airways, causing them to become inflamed, narrow, and produce excess mucus. Common triggers include viral infections, allergens (dust mites, pollen, pet dander), irritants (smoke, strong odors), cold air, and exercise. Symptoms include wheezing, cough (especially at night), shortness of breath, and chest tightness. Management includes avoiding triggers, using prescribed inhalers, and having an asthma action plan.",
'medical_focus': 'pediatric_pulmonology',
'source': 'default_knowledge'
},
{
'id': 7,
'text': "Bronchiolitis is a common respiratory infection in infants and young children, typically caused by respiratory syncytial virus (RSV). It affects the small airways (bronchioles) in the lungs, causing inflammation and mucus buildup. Symptoms include runny nose, cough, low-grade fever, and difficulty breathing. Most cases are mild and resolve with supportive care, but severe cases may require hospitalization for oxygen support.",
'medical_focus': 'pediatric_pulmonology',
'source': 'default_knowledge'
},
{
'id': 8,
'text': "Pneumonia in children is an infection that inflames air sacs in one or both lungs, which may fill with fluid. It can be caused by bacteria, viruses, or fungi. Symptoms include cough with phlegm, fever, chills, and difficulty breathing. Bacterial pneumonia often requires antibiotic treatment, while viral pneumonia typically resolves with supportive care. Seek medical attention for persistent fever, difficulty breathing, or worsening symptoms.",
'medical_focus': 'pediatric_pulmonology',
'source': 'default_knowledge'
},
{
'id': 9,
'text': "Croup is a respiratory condition that causes swelling around the vocal cords. It's most common in children between 6 months and 6 years old. Symptoms include a distinctive barking cough, stridor (harsh sound when breathing in), hoarse voice, and difficulty breathing. Most cases are mild and can be treated at home with humidified air and staying calm. Severe cases with significant breathing difficulty require immediate medical attention.",
'medical_focus': 'pediatric_pulmonology',
'source': 'default_knowledge'
}
]
self.knowledge_chunks = default_knowledge
print(f"π Loaded {len(default_knowledge)} default medical knowledge chunks")
def retrieve_medical_context(self, query: str, n_results: int = 3) -> List[str]:
"""Retrieve relevant medical context using improved keyword search with pulmonology priority"""
if not self.knowledge_chunks:
return []
query_lower = query.lower()
query_words = set(query_lower.split())
chunk_scores = []
# Enhanced medical keyword mapping with pulmonology emphasis
medical_keywords = {
'pulmonology': ['asthma', 'pneumonia', 'bronchiolitis', 'croup', 'respiratory', 'lung', 'airway', 'breathing', 'cough', 'wheeze', 'stridor'],
'fever': ['fever', 'temperature', 'hot', 'warm', 'burning'],
'stomach': ['stomach', 'abdominal', 'belly', 'tummy', 'pain', 'ache'],
'rash': ['rash', 'skin', 'red', 'spots', 'bumps', 'itchy'],
'vomiting': ['vomit', 'vomiting', 'throw up', 'sick', 'nausea'],
'diarrhea': ['diarrhea', 'loose', 'stool', 'bowel', 'poop'],
'dehydration': ['dehydration', 'dehydrated', 'fluids', 'water', 'thirsty'],
'emergency': ['emergency', 'urgent', 'serious', 'severe', 'hospital', 'doctor']
}
# Expand query with related medical terms
expanded_query_words = set(query_words)
for medical_term, synonyms in medical_keywords.items():
if any(word in query_lower for word in synonyms):
expanded_query_words.update(synonyms)
for chunk_info in self.knowledge_chunks:
chunk_text = chunk_info['text'].lower()
# Calculate relevance score with expanded terms
word_overlap = sum(1 for word in expanded_query_words if word in chunk_text)
base_score = word_overlap / len(expanded_query_words) if expanded_query_words else 0
# Strong boost for pulmonology content
medical_boost = 0
medical_focus = chunk_info.get('medical_focus', '')
source = chunk_info.get('source', '')
if medical_focus == 'pediatric_pulmonology':
medical_boost = 0.8 # Highest priority for pulmonology
elif source == 'pediatric_pulmonology_file':
medical_boost = 0.7 # High priority for your uploaded data
elif medical_focus == 'emergency':
medical_boost = 0.4
elif medical_focus in ['treatments', 'diagnosis']:
medical_boost = 0.3
elif medical_focus == 'pediatric_symptoms':
medical_boost = 0.5
final_score = base_score + medical_boost
if final_score > 0:
chunk_scores.append((final_score, chunk_info['text']))
# Return top matches - prioritize pulmonology content
chunk_scores.sort(reverse=True)
results = [chunk for _, chunk in chunk_scores[:n_results]]
# If no good matches, return some default medical chunks
if not results:
results = [chunk['text'] for chunk in self.knowledge_chunks[:2]]
return results
def generate_biogpt_response(self, context: str, query: str) -> str:
"""Generate medical response - prioritizing rule-based for reliability"""
# First try rule-based response using context - this is more reliable
rule_based_response = self.generate_rule_based_response(context, query)
# If rule-based gives a good response, use it
if len(rule_based_response) > 50 and not "consult with a qualified healthcare provider" in rule_based_response:
print("β
Using rule-based response (reliable)")
return rule_based_response
# Only try AI model if rule-based didn't work and model is available
if self.model and self.tokenizer:
try:
print("π€ Trying AI model generation...")
# Create medical prompt
prompt = f"Medical Question: {query}\n\nMedical Information: {context[:300]}\n\nAnswer:"
# Tokenize with conservative limits
inputs = self.tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=400,
padding=True
)
# Move to device
if self.device == "cuda":
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Generate with conservative settings
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=80,
do_sample=True,
temperature=0.2, # Very low temperature
top_p=0.7,
top_k=20,
pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.2,
early_stopping=True
)
# Decode response
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract generated part
if "Answer:" in full_response:
generated_response = full_response.split("Answer:")[-1].strip()
else:
generated_response = full_response[len(prompt):].strip()
# Simple cleaning
cleaned_response = self.simple_clean_response(generated_response)
# Validate AI response quality
if (len(cleaned_response.strip()) > 30 and
self.validate_medical_response(cleaned_response, query)):
print("β
Using AI model response")
return cleaned_response
else:
print("β οΈ AI response quality check failed, using rule-based")
return rule_based_response
except Exception as e:
print(f"β οΈ Model generation failed: {e}")
# Final fallback
print("π Using rule-based fallback")
return rule_based_response
def simple_clean_response(self, response: str) -> str:
"""Simple, less aggressive cleaning"""
# Remove obvious artifacts
response = re.sub(r'<[^>]*>', '', response)
response = re.sub(r'[βββ
ββββββββββββββββββββββββββ]', '', response)
# Clean whitespace
response = re.sub(r'\s+', ' ', response).strip()
# Take first reasonable sentences
sentences = re.split(r'[.!?]+', response)
good_sentences = []
for sentence in sentences:
sentence = sentence.strip()
if len(sentence) > 5 and len(sentence.split()) >= 2:
good_sentences.append(sentence)
if len(good_sentences) >= 2: # Max 2 sentences
break
if good_sentences:
result = '. '.join(good_sentences)
if not result.endswith('.'):
result += '.'
return result
return response[:200] if response else ""
def validate_medical_response(self, response: str, query: str) -> bool:
"""Validate that AI response is relevant to the query"""
response_lower = response.lower()
query_lower = query.lower()
# Extract key medical terms from query
query_words = set(query_lower.split())
medical_terms = ['asthma', 'pneumonia', 'cough', 'fever', 'breathing', 'bronchiolitis', 'croup']
query_medical_terms = [term for term in medical_terms if term in query_lower]
# Check if response mentions the same medical condition
if query_medical_terms:
for term in query_medical_terms:
if term in response_lower:
return True
return False # Response doesn't mention the queried condition
# For general queries, check for reasonable medical content
medical_keywords = ['symptoms', 'treatment', 'children', 'medical', 'doctor', 'healthcare']
return any(keyword in response_lower for keyword in medical_keywords)
def generate_rule_based_response(self, context: str, query: str) -> str:
"""Generate rule-based medical response when model fails or for reliability"""
query_lower = query.lower()
# Priority 1: Use context directly if it's highly relevant
if context:
context_sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 15]
# Find sentences that match the query topic
relevant_sentences = []
query_words = set(query_lower.split())
for sentence in context_sentences:
sentence_words = set(sentence.lower().split())
overlap = len(query_words.intersection(sentence_words))
if overlap > 0:
relevant_sentences.append((overlap, sentence))
# Sort by relevance and take top sentences
relevant_sentences.sort(reverse=True, key=lambda x: x[0])
if relevant_sentences:
# Use top 2 most relevant sentences
response_sentences = [sent[1] for sent in relevant_sentences[:2]]
response = '. '.join(response_sentences)
if not response.endswith('.'):
response += '.'
# Add appropriate medical advice
if any(word in query_lower for word in ['emergency', 'urgent', 'severe', 'serious']):
response += " If symptoms are severe or concerning, seek immediate medical attention."
elif any(word in query_lower for word in ['treatment', 'medicine', 'medication']):
response += " Always follow your healthcare provider's treatment recommendations."
else:
response += " Consult with a healthcare provider for personalized medical advice."
return response
# Priority 2: Specific keyword-based responses (enhanced)
keyword_responses = {
'asthma': "Asthma in children is a chronic respiratory condition that causes the airways to become inflamed, narrow, and produce excess mucus. Common symptoms include wheezing, cough (especially at night), shortness of breath, and chest tightness. Triggers can include viral infections, allergens like dust mites and pollen, irritants such as smoke, cold air, and exercise. Management typically involves avoiding known triggers, using prescribed medications like rescue inhalers for acute symptoms and controller medications for long-term management, and having an asthma action plan. Regular follow-up with healthcare providers is essential for optimal asthma control.",
'pneumonia': "Pneumonia is a lung infection that causes inflammation in the air sacs of one or both lungs, which may fill with fluid or pus. In children, it can be caused by bacteria, viruses, or other organisms. Common symptoms include cough with phlegm, fever, chills, difficulty breathing, chest pain, and fatigue. Bacterial pneumonia typically requires antibiotic treatment, while viral pneumonia is managed with supportive care including rest, fluids, and fever management. Seek medical evaluation promptly for persistent fever, difficulty breathing, or worsening symptoms.",
'bronchiolitis': "Bronchiolitis is a common respiratory infection in infants and young children under 2 years old, usually caused by viruses like respiratory syncytial virus (RSV). It affects the small airways (bronchioles) in the lungs, causing inflammation and mucus buildup. Symptoms typically start like a cold with runny nose and cough, then may progress to difficulty breathing, wheezing, and feeding problems. Most cases are mild and resolve with supportive care at home, but some children may need hospitalization for breathing support and monitoring.",
'croup': "Croup is a respiratory condition that causes swelling around the vocal cords and windpipe. It's most common in children between 6 months and 6 years old. The hallmark symptom is a distinctive barking cough that sounds like a seal, along with stridor (harsh sound when breathing in), hoarse voice, and sometimes difficulty breathing. Most cases are mild and can be managed at home with humidified air and keeping the child calm. However, seek immediate medical care if breathing becomes severely difficult or if stridor is present at rest.",
'cough': "Cough in children can have many different causes including viral upper respiratory infections (most common), asthma, allergies, bacterial infections, or environmental irritants. Most coughs from common colds resolve within 2-3 weeks without specific treatment. However, seek medical evaluation for coughs that persist longer than 3 weeks, are accompanied by high fever, produce blood, cause significant difficulty breathing, or severely interfere with sleep and daily activities. Treatment depends on identifying and addressing the underlying cause.",
'fever': "Fever in children is usually a sign that the body is fighting an infection, most commonly viral. Normal body temperature ranges from 97Β°F to 100.4Β°F (36.1Β°C to 38Β°C), with fever generally defined as a temperature above 100.4Β°F (38Β°C). Most fevers are not dangerous and actually help the immune system fight infection. Treatment focuses on comfort measures including adequate rest, increased fluid intake, and appropriate fever reducers like acetaminophen or ibuprofen if needed. Seek medical care for very high fevers over 104Β°F (40Β°C), fevers in infants under 3 months old, or if the child appears very ill.",
'breathing': "Breathing difficulties in children can range from mild to severe and have various causes including asthma, respiratory infections, allergies, or airway obstruction. Signs of respiratory distress include rapid breathing, difficulty speaking in full sentences, use of extra muscles to breathe (retractions), wheezing, or blue coloring around the lips or fingernails. Mild breathing difficulties may be managed with prescribed medications like inhalers, but severe breathing problems require immediate medical attention. Always seek emergency care if a child cannot breathe comfortably or appears to be struggling significantly.",
'wheeze': "Wheezing is a high-pitched whistling sound that occurs when breathing, usually more noticeable when exhaling. In children, it's commonly caused by asthma, but can also result from respiratory infections, allergies, or airway inflammation. Wheezing indicates narrowed or partially blocked airways. Treatment depends on the underlying cause and may include bronchodilator medications (rescue inhalers), anti-inflammatory medications, or treatment of underlying infections. Persistent or severe wheezing should be evaluated by a healthcare provider."
}
# Check for keyword matches with partial matching
for keyword, response in keyword_responses.items():
if keyword in query_lower or any(keyword in word for word in query_lower.split()):
return response
# Priority 3: General topic-based responses
if any(word in query_lower for word in ['child', 'children', 'pediatric', 'baby', 'infant']):
if any(word in query_lower for word in ['sick', 'illness', 'disease', 'condition']):
return "When children become ill, it's important to monitor their symptoms carefully and provide appropriate care. Common childhood illnesses include respiratory infections, gastrointestinal issues, and fever-related conditions. Most childhood illnesses are mild and resolve with supportive care, but some may require medical evaluation. Always consult with a pediatric healthcare provider when you're concerned about your child's health, especially for persistent symptoms, high fevers, or any signs of serious illness."
# Priority 4: Generic medical response
return "For specific medical concerns about your child's health, it's important to consult with a qualified pediatric healthcare provider. They can properly evaluate your child's individual situation, provide accurate diagnosis, and recommend appropriate treatment based on their specific symptoms, medical history, and current condition. If you're dealing with urgent symptoms or have immediate concerns about your child's breathing, fever, or overall condition, don't hesitate to seek prompt medical attention."
def handle_conversational_interactions(self, query: str) -> Optional[str]:
"""Handle conversational interactions"""
query_lower = query.lower().strip()
# Greeting patterns
exact_greetings = [
'hello', 'hi', 'hey', 'good morning', 'good afternoon',
'good evening', 'how are you', 'how are you doing'
]
if query_lower in exact_greetings:
return "π Hello! I'm BioGPT, your AI medical assistant specialized in pediatric pulmonology. I provide evidence-based medical information about children's respiratory health. What can I help you with today?"
# Thanks patterns
thanks_only = ['thank you', 'thanks', 'thank you so much', 'thanks a lot']
if query_lower in thanks_only:
return "π You're welcome! I'm glad I could provide helpful pediatric pulmonology information. Remember to always consult healthcare providers for personalized advice. Feel free to ask more questions!"
# Help patterns
help_only = ['help', 'what can you do', 'what are you', 'who are you']
if query_lower in help_only:
return """π€ **About BioGPT Pediatric Pulmonology Assistant**
I'm an AI medical assistant powered by BioGPT, specialized in pediatric pulmonology and respiratory medicine. I can help with:
π« **Pediatric Pulmonology:**
β’ Asthma, bronchiolitis, pneumonia, croup
β’ Respiratory symptoms and breathing difficulties
β’ Treatment guidance and management
β’ When to seek medical care
β οΈ **Important:** I provide educational information only. Always consult healthcare professionals for medical decisions."""
return None
def chat_interface(self, message: str, history: List[List[str]]) -> str:
"""Main chat interface for Gradio"""
if not message.strip():
return "Hello! I'm BioGPT, your pediatric pulmonology AI assistant. How can I help you with children's respiratory health today?"
print(f"π Processing query: '{message}'")
# Handle conversational interactions
conversational_response = self.handle_conversational_interactions(message)
if conversational_response:
print(" Handled as conversational")
return conversational_response
print(" Processing as medical query")
# Process as medical query
context = self.retrieve_medical_context(message)
if not context:
return f"""π©Ί **Medical Query:** {message}
β οΈ I don't have specific information about this topic in my current medical database. However, I recommend:
1. **Consult Healthcare Provider**: For personalized medical advice
2. **Emergency Signs**: If symptoms are severe, seek immediate care
3. **Pediatric Specialist**: For specialized concerns
**For urgent medical concerns, contact your healthcare provider or emergency services.**
π‘ **Try asking about**: asthma, breathing difficulties, cough, pneumonia, or other respiratory symptoms."""
# Generate medical response
main_context = '\n\n'.join(context)
response = self.generate_biogpt_response(main_context, message)
# Format as medical response
final_response = f"π©Ί **BioGPT Medical Assistant:** {response}\n\nβ οΈ **Important:** This information is for educational purposes only. Always consult qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
return final_response
def get_knowledge_stats(self) -> Dict:
"""Get statistics about loaded knowledge"""
if not self.knowledge_chunks:
return {"total_chunks": 0}
stats = {
"total_chunks": len(self.knowledge_chunks),
"default_knowledge": len([c for c in self.knowledge_chunks if c.get('source') == 'default_knowledge']),
"pulmonology_file_data": len([c for c in self.knowledge_chunks if c.get('source') == 'pediatric_pulmonology_file']),
"pulmonology_focused": len([c for c in self.knowledge_chunks if c.get('medical_focus') == 'pediatric_pulmonology']),
"model_used": getattr(self, 'model_name', 'Unknown')
}
return stats
# Test function
def test_chatbot_responses():
"""Test the chatbot with various queries"""
print("\nπ§ͺ Testing BioGPT Pediatric Pulmonology Chatbot...")
print("=" * 50)
test_queries = [
"hello",
"what is asthma in children",
"my child has breathing difficulties",
"help",
"treatment for pediatric pneumonia",
"thank you"
]
for query in test_queries:
print(f"\nπ Query: '{query}'")
response = chatbot.chat_interface(query, [])
response_type = 'CONVERSATIONAL' if any(word in response for word in ['Hello!', 'welcome!', 'About BioGPT']) else 'MEDICAL'
print(f"π€ Response type: {response_type}")
print(f"π Response: {response[:100]}...")
print("-" * 30)
# Initialize the chatbot globally
print("π Initializing BioGPT Pediatric Pulmonology Chatbot...")
chatbot = BioGPTMedicalChatbot()
# Show knowledge statistics
print("\nπ Knowledge Base Statistics:")
stats = chatbot.get_knowledge_stats()
for key, value in stats.items():
print(f" {key}: {value}")
# Run tests
test_chatbot_responses()
def create_gradio_interface():
"""Create Gradio chat interface"""
# Get current knowledge stats for display
stats = chatbot.get_knowledge_stats()
# Custom CSS for medical theme
css = """
.gradio-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
}
.chat-message {
background-color: #f8f9fa;
border-radius: 10px;
padding: 10px;
margin: 5px;
}
"""
with gr.Blocks(
css=css,
title="BioGPT Pediatric Pulmonology Assistant",
theme=gr.themes.Soft()
) as demo:
# Header
gr.HTML(f"""
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea, #764ba2); color: white; border-radius: 10px; margin-bottom: 20px;">
<h1>π« BioGPT Pediatric Pulmonology Assistant</h1>
<p>Specialized AI Medical Chatbot for Children's Respiratory Health</p>
<p><strong>Powered by BioGPT | {stats['total_chunks']} Medical Knowledge Chunks Loaded</strong></p>
<p><small>Model: {stats['model_used']} | Pulmonology Data: {stats['pulmonology_file_data']} chunks</small></p>
</div>
""")
# Important disclaimer
gr.HTML("""
<div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
<h3 style="color: #856404; margin-top: 0;">β οΈ Medical Disclaimer</h3>
<p style="color: #856404; margin-bottom: 0;">
This AI provides educational pediatric pulmonology information only and is NOT a substitute for professional medical advice,
diagnosis, or treatment. Always consult qualified healthcare providers for medical decisions.
<strong>In case of respiratory emergency, call emergency services immediately.</strong>
</p>
</div>
""")
# Chat interface
chatbot_interface = gr.ChatInterface(
fn=chatbot.chat_interface,
title="π¬ Chat with BioGPT Pulmonology Assistant",
description="Ask me about pediatric respiratory health, asthma, breathing difficulties, and pulmonology treatments.",
examples=[
"What is asthma in children?",
"My child has a persistent cough, what should I do?",
"How is bronchiolitis treated in infants?",
"When should I be worried about my child's breathing?",
"What are the signs of pneumonia in children?",
"How can I prevent respiratory infections?"
],
retry_btn=None,
undo_btn=None,
clear_btn="ποΈ Clear Chat",
submit_btn="π« Ask BioGPT",
chatbot=gr.Chatbot(
height=500,
placeholder="<div style='text-align: center; color: #666;'>Start a conversation with BioGPT Pediatric Pulmonology Assistant</div>",
show_copy_button=True,
bubble_full_width=False
)
)
# Information tabs
with gr.Tabs():
with gr.Tab("βΉοΈ About"):
gr.Markdown(f"""
## About BioGPT Pediatric Pulmonology Assistant
This AI assistant is powered by **BioGPT**, specialized for pediatric pulmonology and respiratory medicine.
### π― Current Knowledge Base:
- **Total Chunks**: {stats['total_chunks']}
- **Default Medical Knowledge**: {stats['default_knowledge']} chunks
- **Pulmonology File Data**: {stats['pulmonology_file_data']} chunks
- **Pulmonology Focus**: {stats['pulmonology_focused']} chunks
- **Model**: {stats['model_used']}
### π« Specializations:
- **Pediatric Asthma**: Diagnosis, treatment, management
- **Respiratory Infections**: Pneumonia, bronchiolitis, croup
- **Breathing Difficulties**: Assessment and guidance
- **Chronic Respiratory Conditions**: Long-term management
- **Emergency Respiratory Care**: When to seek immediate help
### π§ Technical Features:
- **Model**: Microsoft BioGPT (Medical AI) with fallback systems
- **Auto-Loading**: Automatically loads your pulmonology data file
- **Smart Retrieval**: Prioritizes pulmonology content
- **Rule-Based Fallback**: Ensures reliable responses even if AI model fails
### π± How to Use:
1. Type your pediatric respiratory question
2. Be specific about symptoms or conditions
3. Ask about treatments, diagnosis, or management
4. Request guidance on when to seek care
""")
with gr.Tab("π« Pulmonology Topics"):
gr.Markdown("""
## Pediatric Pulmonology Coverage
### π΄ Common Respiratory Conditions:
- **Asthma**: Triggers, symptoms, management, action plans
- **Bronchiolitis**: RSV, treatment, when to hospitalize
- **Pneumonia**: Bacterial vs viral, antibiotics, recovery
- **Croup**: Barking cough, stridor, home treatment
- **Bronchitis**: Acute vs chronic, treatment approaches
### π‘ Respiratory Symptoms:
- **Cough**: Persistent, productive, dry, nocturnal
- **Wheezing**: Causes, assessment, treatment
- **Shortness of Breath**: Evaluation and management
- **Chest Pain**: When concerning in children
- **Stridor**: Upper airway obstruction signs
### π’ Diagnostic & Treatment:
- **Pulmonary Function Tests**: When appropriate
- **Imaging**: X-rays, CT scans for respiratory issues
- **Medications**: Bronchodilators, steroids, antibiotics
- **Oxygen Therapy**: Indications and monitoring
- **Respiratory Support**: CPAP, ventilation considerations
### π΅ Prevention & Management:
- **Trigger Avoidance**: Environmental controls
- **Vaccination**: Respiratory disease prevention
- **Exercise Guidelines**: For children with respiratory conditions
- **School Management**: Asthma action plans, inhaler use
""")
with gr.Tab("β οΈ Emergency & Safety"):
gr.Markdown("""
## Respiratory Emergency Guidance
### π¨ CALL EMERGENCY SERVICES IMMEDIATELY:
- **Severe Breathing Difficulty**: Cannot speak in full sentences
- **Blue Lips or Fingernails**: Cyanosis indicating oxygen deprivation
- **Severe Wheezing**: With significant distress
- **Stridor at Rest**: High-pitched breathing sound
- **Unconsciousness**: Related to breathing problems
- **Severe Chest Retractions**: Pulling in around ribs/sternum
### π₯ SEEK IMMEDIATE MEDICAL CARE:
- **Persistent High Fever**: >104Β°F (40Β°C) with respiratory symptoms
- **Worsening Symptoms**: Despite treatment
- **Dehydration Signs**: With respiratory illness
- **Significant Behavior Changes**: Extreme lethargy, irritability
- **Inhaler Not Helping**: Asthma symptoms not responding
### π CONTACT HEALTHCARE PROVIDER:
- **New Respiratory Symptoms**: Lasting more than a few days
- **Chronic Cough**: Persisting beyond 2-3 weeks
- **Asthma Questions**: About medications or management
- **Fever with Cough**: Especially if productive
- **Exercise Limitations**: Due to breathing difficulties
### π HOME MONITORING:
- **Respiratory Rate**: Normal ranges by age
- **Oxygen Saturation**: If pulse oximeter available
- **Peak Flow**: For children with asthma
- **Symptom Tracking**: Using asthma diaries or apps
""")
with gr.Tab("π Data Information"):
gr.Markdown(f"""
## Knowledge Base Status
### π Current Data Loaded:
- **Total Medical Chunks**: {stats['total_chunks']}
- **Default Knowledge**: {stats['default_knowledge']} chunks
- **Your Pulmonology File**: {stats['pulmonology_file_data']} chunks
- **Pulmonology Focused**: {stats['pulmonology_focused']} chunks
- **AI Model**: {stats['model_used']}
### π How Your Data Is Used:
1. **Auto-Detection**: System automatically looks for 'Pediatric_cleaned.txt'
2. **Smart Processing**: Breaks your file into medical chunks
3. **Priority Ranking**: Pulmonology content gets highest priority
4. **Context Retrieval**: Relevant chunks are used to answer questions
5. **Fallback Systems**: Multiple layers ensure reliable responses
### π Expected File Formats:
- **Filename**: 'Pediatric_cleaned.txt' (case variations accepted)
- **Content**: Plain text with medical information
- **Structure**: Paragraphs, sections, or bullet points
- **Focus**: Pediatric pulmonology and respiratory medicine
### π Upload Instructions:
1. Go to your Hugging Face Space
2. Click "Files" tab
3. Upload your 'Pediatric_cleaned.txt' file
4. Restart the Space to reload data
{"β
**Status**: Pulmonology data file loaded successfully!" if stats['pulmonology_file_data'] > 0 else "β οΈ **Status**: No pulmonology data file detected. Upload 'Pediatric_cleaned.txt' to enhance responses."}
### π‘οΈ Reliability Features:
- **Multiple Model Support**: Tries BioGPT, DialoGPT, GPT-2 in order
- **Rule-Based Fallback**: If all AI models fail, uses knowledge-based responses
- **Conservative Generation**: Prevents problematic outputs
- **Medical Context Priority**: Always uses your medical knowledge first
""")
# Footer
gr.HTML("""
<div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd; color: #666;">
<p>π« <strong>BioGPT Pediatric Pulmonology Assistant</strong> | Powered by Microsoft BioGPT</p>
<p>Specialized in Children's Respiratory Health β’ Always consult healthcare professionals</p>
</div>
""")
return demo
# Create and launch the interface
demo = create_gradio_interface()
if __name__ == "__main__":
# Launch the app
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)
|