Spaces:
Sleeping
Sleeping
File size: 3,332 Bytes
add57fc 9170bfd add57fc 9170bfd 34f285a add57fc 9170bfd 3594567 9170bfd add57fc 34f285a add57fc 34f285a add57fc 9170bfd add57fc 34f285a 9170bfd 34f285a add57fc 9170bfd 3594567 9170bfd 34f285a 9170bfd add57fc 9170bfd 34f285a add57fc 34f285a add57fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from PIL import Image
from PyPDF2 import PdfReader
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
# Load Hugging Face OCR model for text extraction
ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
# Load Bio_ClinicalBERT model for medical analysis
model_name = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Function to extract text from images or PDFs
def extract_text(file_path):
try:
# Handle PDFs
if file_path.endswith(".pdf"):
reader = PdfReader(file_path)
text = "".join(page.extract_text() for page in reader.pages)
return text.strip()
# Handle images
result = ocr_model(Image.open(file_path))
return result[0]['generated_text']
except Exception as e:
return f"Error processing the file: {e}"
# Function to generate a PDF report
def create_pdf_report(analysis, output_path):
c = canvas.Canvas(output_path, pagesize=letter)
c.drawString(100, 750, "Blood Test Report Analysis")
c.drawString(100, 730, "---------------------------")
y_position = 700
for line in analysis.split("\n"):
c.drawString(100, y_position, line)
y_position -= 20
c.save()
return output_path
# Function to analyze blood test reports using Bio_ClinicalBERT
def analyze_blood_test(file):
try:
# Step 1: Extract text from uploaded file
extracted_text = extract_text(file)
if not extracted_text.strip():
return "No readable text found in the uploaded file.", None
# Step 2: Tokenize and pass through Bio_ClinicalBERT
inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)
# Generate labels and scores
logits = outputs.logits
predictions = logits.softmax(dim=-1)
# Generate analysis report
analysis_report = "π Analysis Results:\n"
for i, score in enumerate(predictions[0]):
analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n"
# Step 3: Generate a PDF report
output_pdf = "analysis_report.pdf"
create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)
return analysis_report, output_pdf
except Exception as e:
return f"Error processing file: {e}", None
# Gradio interface setup
interface = gr.Interface(
fn=analyze_blood_test,
inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"),
outputs=[
gr.Textbox(label="Analysis Results"),
gr.File(label="Download PDF Report")
],
title="MedAI Analyzer",
description=(
"Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. "
"The app will extract and analyze the values, flag abnormalities, and provide health recommendations. "
"You can also download a detailed PDF report of the analysis."
),
allow_flagging="never"
)
if __name__ == "__main__":
interface.launch()
|