Spaces:

erayman09
/

MedAI_Analyzer

Sleeping

App Files Files Community

erayman09 commited on Nov 30, 2024

Commit

9170bfd

verified ·

1 Parent(s): 3594567

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -12

app.py CHANGED Viewed

@@ -1,17 +1,20 @@
 import gradio as gr
 from transformers import pipeline
 from PIL import Image
 from PyPDF2 import PdfReader
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
-# Load Hugging Face OCR model
 ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
-# Load BioGPT or another medical model
-medical_analyzer = pipeline("text-classification", model="microsoft/biogpt")
-# Function to extract text using Hugging Face OCR model
 def extract_text(file_path):
     try:
         # Handle PDFs
@@ -37,23 +40,28 @@ def create_pdf_report(analysis, output_path):
     c.save()
     return output_path
-# Function to analyze blood test reports
 def analyze_blood_test(file):
     try:
-        # Step 1: Extract text
         extracted_text = extract_text(file)
         if not extracted_text.strip():
             return "No readable text found in the uploaded file.", None
-        # Step 2: Analyze extracted text using BioGPT
-        analysis_results = medical_analyzer(extracted_text)
-        # Step 3: Generate analysis report
         analysis_report = "🔍 Analysis Results:\n"
-        for item in analysis_results:
-            analysis_report += f"- {item['label']}: {item['score']:.2f}\n"
-        # Step 4: Generate PDF report
         output_pdf = "analysis_report.pdf"
         create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import pipeline
 from PIL import Image
 from PyPDF2 import PdfReader
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
+# Load Hugging Face OCR model for text extraction
 ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
+# Load Bio_ClinicalBERT model for medical analysis
+model_name = "emilyalsentzer/Bio_ClinicalBERT"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Function to extract text from images or PDFs
 def extract_text(file_path):
     try:
         # Handle PDFs
     c.save()
     return output_path
+# Function to analyze blood test reports using Bio_ClinicalBERT
 def analyze_blood_test(file):
     try:
+        # Step 1: Extract text from uploaded file
         extracted_text = extract_text(file)
         if not extracted_text.strip():
             return "No readable text found in the uploaded file.", None
+        # Step 2: Tokenize and pass through Bio_ClinicalBERT
+        inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
+        outputs = model(**inputs)
+        # Generate labels and scores
+        logits = outputs.logits
+        predictions = logits.softmax(dim=-1)
+        # Generate analysis report
         analysis_report = "🔍 Analysis Results:\n"
+        for i, score in enumerate(predictions[0]):
+            analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n"
+        # Step 3: Generate a PDF report
         output_pdf = "analysis_report.pdf"
         create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)