Spaces:

erayman09
/

MedAI_Analyzer

Sleeping

File size: 3,332 Bytes

add57fc
9170bfd
add57fc
 
 
 
 
 
9170bfd
34f285a
add57fc
9170bfd
 
 
 
3594567
9170bfd
add57fc
34f285a
 
 
 
 
 
 
 
 
 
 
add57fc
 
 
 
 
 
 
 
 
34f285a
add57fc
 
 
9170bfd
add57fc
34f285a
9170bfd
34f285a
 
 
add57fc
9170bfd
 
 
3594567
9170bfd
 
 
 
 
34f285a
9170bfd
 
add57fc
9170bfd
34f285a
 
 
 
 
 
add57fc
 
 
 
34f285a
add57fc

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from PIL import Image
from PyPDF2 import PdfReader
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

# Load Hugging Face OCR model for text extraction
ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

# Load Bio_ClinicalBERT model for medical analysis
model_name = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function to extract text from images or PDFs
def extract_text(file_path):
    try:
        # Handle PDFs
        if file_path.endswith(".pdf"):
            reader = PdfReader(file_path)
            text = "".join(page.extract_text() for page in reader.pages)
            return text.strip()
        # Handle images
        result = ocr_model(Image.open(file_path))
        return result[0]['generated_text']
    except Exception as e:
        return f"Error processing the file: {e}"

# Function to generate a PDF report
def create_pdf_report(analysis, output_path):
    c = canvas.Canvas(output_path, pagesize=letter)
    c.drawString(100, 750, "Blood Test Report Analysis")
    c.drawString(100, 730, "---------------------------")
    y_position = 700
    for line in analysis.split("\n"):
        c.drawString(100, y_position, line)
        y_position -= 20
    c.save()
    return output_path

# Function to analyze blood test reports using Bio_ClinicalBERT
def analyze_blood_test(file):
    try:
        # Step 1: Extract text from uploaded file
        extracted_text = extract_text(file)
        if not extracted_text.strip():
            return "No readable text found in the uploaded file.", None

        # Step 2: Tokenize and pass through Bio_ClinicalBERT
        inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)

        # Generate labels and scores
        logits = outputs.logits
        predictions = logits.softmax(dim=-1)

        # Generate analysis report
        analysis_report = "🔍 Analysis Results:\n"
        for i, score in enumerate(predictions[0]):
            analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n"

        # Step 3: Generate a PDF report
        output_pdf = "analysis_report.pdf"
        create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)

        return analysis_report, output_pdf
    except Exception as e:
        return f"Error processing file: {e}", None

# Gradio interface setup
interface = gr.Interface(
    fn=analyze_blood_test,
    inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"),
    outputs=[
        gr.Textbox(label="Analysis Results"),
        gr.File(label="Download PDF Report")
    ],
    title="MedAI Analyzer",
    description=(
        "Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. "
        "The app will extract and analyze the values, flag abnormalities, and provide health recommendations. "
        "You can also download a detailed PDF report of the analysis."
    ),
    allow_flagging="never"
)

if __name__ == "__main__":
    interface.launch()