import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import pipeline from PIL import Image from PyPDF2 import PdfReader from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas # Load Hugging Face OCR model for text extraction ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # Load Bio_ClinicalBERT model for medical analysis model_name = "emilyalsentzer/Bio_ClinicalBERT" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Function to extract text from images or PDFs def extract_text(file_path): try: # Handle PDFs if file_path.endswith(".pdf"): reader = PdfReader(file_path) text = "".join(page.extract_text() for page in reader.pages) return text.strip() # Handle images result = ocr_model(Image.open(file_path)) return result[0]['generated_text'] except Exception as e: return f"Error processing the file: {e}" # Function to generate a PDF report def create_pdf_report(analysis, output_path): c = canvas.Canvas(output_path, pagesize=letter) c.drawString(100, 750, "Blood Test Report Analysis") c.drawString(100, 730, "---------------------------") y_position = 700 for line in analysis.split("\n"): c.drawString(100, y_position, line) y_position -= 20 c.save() return output_path # Function to analyze blood test reports using Bio_ClinicalBERT def analyze_blood_test(file): try: # Step 1: Extract text from uploaded file extracted_text = extract_text(file) if not extracted_text.strip(): return "No readable text found in the uploaded file.", None # Step 2: Tokenize and pass through Bio_ClinicalBERT inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True) outputs = model(**inputs) # Generate labels and scores logits = outputs.logits predictions = logits.softmax(dim=-1) # Generate analysis report analysis_report = "🔍 Analysis Results:\n" for i, score in enumerate(predictions[0]): analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n" # Step 3: Generate a PDF report output_pdf = "analysis_report.pdf" create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf) return analysis_report, output_pdf except Exception as e: return f"Error processing file: {e}", None # Gradio interface setup interface = gr.Interface( fn=analyze_blood_test, inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"), outputs=[ gr.Textbox(label="Analysis Results"), gr.File(label="Download PDF Report") ], title="MedAI Analyzer", description=( "Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. " "The app will extract and analyze the values, flag abnormalities, and provide health recommendations. " "You can also download a detailed PDF report of the analysis." ), allow_flagging="never" ) if __name__ == "__main__": interface.launch()