Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
from PIL import Image | |
from PyPDF2 import PdfReader | |
from reportlab.lib.pagesizes import letter | |
from reportlab.pdfgen import canvas | |
# Load Hugging Face OCR model for text extraction | |
ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
# Load Bio_ClinicalBERT model for medical analysis | |
model_name = "emilyalsentzer/Bio_ClinicalBERT" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
# Function to extract text from images or PDFs | |
def extract_text(file_path): | |
try: | |
# Handle PDFs | |
if file_path.endswith(".pdf"): | |
reader = PdfReader(file_path) | |
text = "".join(page.extract_text() for page in reader.pages) | |
return text.strip() | |
# Handle images | |
result = ocr_model(Image.open(file_path)) | |
return result[0]['generated_text'] | |
except Exception as e: | |
return f"Error processing the file: {e}" | |
# Function to generate a PDF report | |
def create_pdf_report(analysis, output_path): | |
c = canvas.Canvas(output_path, pagesize=letter) | |
c.drawString(100, 750, "Blood Test Report Analysis") | |
c.drawString(100, 730, "---------------------------") | |
y_position = 700 | |
for line in analysis.split("\n"): | |
c.drawString(100, y_position, line) | |
y_position -= 20 | |
c.save() | |
return output_path | |
# Function to analyze blood test reports using Bio_ClinicalBERT | |
def analyze_blood_test(file): | |
try: | |
# Step 1: Extract text from uploaded file | |
extracted_text = extract_text(file) | |
if not extracted_text.strip(): | |
return "No readable text found in the uploaded file.", None | |
# Step 2: Tokenize and pass through Bio_ClinicalBERT | |
inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True) | |
outputs = model(**inputs) | |
# Generate labels and scores | |
logits = outputs.logits | |
predictions = logits.softmax(dim=-1) | |
# Generate analysis report | |
analysis_report = "π Analysis Results:\n" | |
for i, score in enumerate(predictions[0]): | |
analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n" | |
# Step 3: Generate a PDF report | |
output_pdf = "analysis_report.pdf" | |
create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf) | |
return analysis_report, output_pdf | |
except Exception as e: | |
return f"Error processing file: {e}", None | |
# Gradio interface setup | |
interface = gr.Interface( | |
fn=analyze_blood_test, | |
inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"), | |
outputs=[ | |
gr.Textbox(label="Analysis Results"), | |
gr.File(label="Download PDF Report") | |
], | |
title="MedAI Analyzer", | |
description=( | |
"Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. " | |
"The app will extract and analyze the values, flag abnormalities, and provide health recommendations. " | |
"You can also download a detailed PDF report of the analysis." | |
), | |
allow_flagging="never" | |
) | |
if __name__ == "__main__": | |
interface.launch() | |