erayman09 commited on
Commit
9170bfd
Β·
verified Β·
1 Parent(s): 3594567

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,17 +1,20 @@
1
  import gradio as gr
 
2
  from transformers import pipeline
3
  from PIL import Image
4
  from PyPDF2 import PdfReader
5
  from reportlab.lib.pagesizes import letter
6
  from reportlab.pdfgen import canvas
7
 
8
- # Load Hugging Face OCR model
9
  ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
10
 
11
- # Load BioGPT or another medical model
12
- medical_analyzer = pipeline("text-classification", model="microsoft/biogpt")
 
 
13
 
14
- # Function to extract text using Hugging Face OCR model
15
  def extract_text(file_path):
16
  try:
17
  # Handle PDFs
@@ -37,23 +40,28 @@ def create_pdf_report(analysis, output_path):
37
  c.save()
38
  return output_path
39
 
40
- # Function to analyze blood test reports
41
  def analyze_blood_test(file):
42
  try:
43
- # Step 1: Extract text
44
  extracted_text = extract_text(file)
45
  if not extracted_text.strip():
46
  return "No readable text found in the uploaded file.", None
47
 
48
- # Step 2: Analyze extracted text using BioGPT
49
- analysis_results = medical_analyzer(extracted_text)
 
50
 
51
- # Step 3: Generate analysis report
 
 
 
 
52
  analysis_report = "πŸ” Analysis Results:\n"
53
- for item in analysis_results:
54
- analysis_report += f"- {item['label']}: {item['score']:.2f}\n"
55
 
56
- # Step 4: Generate PDF report
57
  output_pdf = "analysis_report.pdf"
58
  create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)
59
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  from transformers import pipeline
4
  from PIL import Image
5
  from PyPDF2 import PdfReader
6
  from reportlab.lib.pagesizes import letter
7
  from reportlab.pdfgen import canvas
8
 
9
+ # Load Hugging Face OCR model for text extraction
10
  ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
11
 
12
+ # Load Bio_ClinicalBERT model for medical analysis
13
+ model_name = "emilyalsentzer/Bio_ClinicalBERT"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
16
 
17
+ # Function to extract text from images or PDFs
18
  def extract_text(file_path):
19
  try:
20
  # Handle PDFs
 
40
  c.save()
41
  return output_path
42
 
43
+ # Function to analyze blood test reports using Bio_ClinicalBERT
44
  def analyze_blood_test(file):
45
  try:
46
+ # Step 1: Extract text from uploaded file
47
  extracted_text = extract_text(file)
48
  if not extracted_text.strip():
49
  return "No readable text found in the uploaded file.", None
50
 
51
+ # Step 2: Tokenize and pass through Bio_ClinicalBERT
52
+ inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
53
+ outputs = model(**inputs)
54
 
55
+ # Generate labels and scores
56
+ logits = outputs.logits
57
+ predictions = logits.softmax(dim=-1)
58
+
59
+ # Generate analysis report
60
  analysis_report = "πŸ” Analysis Results:\n"
61
+ for i, score in enumerate(predictions[0]):
62
+ analysis_report += f"- {tokenizer.decode([i])}: {score.item():.2f}\n"
63
 
64
+ # Step 3: Generate a PDF report
65
  output_pdf = "analysis_report.pdf"
66
  create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)
67