import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from PIL import Image
from PyPDF2 import PdfReader
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

# Load Hugging Face OCR model for text extraction
ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")

# Load Bio_ClinicalBERT model for medical analysis
model_name = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function to extract text from images or PDFs
def extract_text(file_path):
    try:
        # Handle PDFs
        if file_path.endswith(".pdf"):
            reader = PdfReader(file_path)
            text = "".join(page.extract_text() for page in reader.pages)
            return text.strip()
        # Handle images
        result = ocr_model(Image.open(file_path))
        return result[0]['generated_text']
    except Exception as e:
        return f"Error processing the file: {e}"

# Function to generate a PDF report
def create_pdf_report(analysis, output_path):
    c = canvas.Canvas(output_path, pagesize=letter)
    c.drawString(100, 750, "Blood Test Report Analysis")
    c.drawString(100, 730, "---------------------------")
    y_position = 700
    for line in analysis.split("\n"):
        c.drawString(100, y_position, line)
        y_position -= 20
    c.save()
    return output_path

# Function to analyze blood test reports using Bio_ClinicalBERT
def analyze_blood_test(file):
    try:
        # Step 1: Extract text from uploaded file
        extracted_text = extract_text(file)
        if not extracted_text.strip():
            return "No readable text found in the uploaded file.", None

        # Step 2: Tokenize and pass through Bio_ClinicalBERT
        inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)

        # Generate labels and scores
        logits = outputs.logits
        predictions = logits.softmax(dim=-1)

        # Generate analysis report
        analysis_report = "🔍 Analysis Results:\n"
        for i, score in enumerate(predictions[0]):
            analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n"

        # Step 3: Generate a PDF report
        output_pdf = "analysis_report.pdf"
        create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)

        return analysis_report, output_pdf
    except Exception as e:
        return f"Error processing file: {e}", None

# Gradio interface setup
interface = gr.Interface(
    fn=analyze_blood_test,
    inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"),
    outputs=[
        gr.Textbox(label="Analysis Results"),
        gr.File(label="Download PDF Report")
    ],
    title="MedAI Analyzer",
    description=(
        "Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. "
        "The app will extract and analyze the values, flag abnormalities, and provide health recommendations. "
        "You can also download a detailed PDF report of the analysis."
    ),
    allow_flagging="never"
)

if __name__ == "__main__":
    interface.launch()