Spaces:
Running
Running
File size: 2,324 Bytes
5dbe551 97b296f beecb06 c7cc8ee beecb06 5dbe551 c7cc8ee beecb06 ade4954 c7cc8ee beecb06 c7cc8ee beecb06 ade4954 c7cc8ee beecb06 ade4954 beecb06 c7cc8ee beecb06 ade4954 beecb06 c7cc8ee ade4954 beecb06 c7cc8ee ade4954 c7cc8ee ade4954 c7cc8ee ade4954 c7cc8ee 5dbe551 c7cc8ee 5dbe551 c7cc8ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import torch
from PIL import Image
import time
from threading import Thread
from transformers import (
Qwen2VLForConditionalGeneration,
AutoProcessor,
TextIteratorStreamer,
)
# Load model and processor - CPU version
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
model = Qwen2VLForConditionalGeneration.from_pretrained(
MODEL_ID,
trust_remote_code=True,
torch_dtype=torch.float32 # Using float32 for CPU compatibility
).to("cpu").eval()
def extract_medicines(image):
"""Extract medicine names from prescription images."""
if image is None:
return "Please upload a prescription image."
# Process the image
text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions."
messages = [{
"role": "user",
"content": [
{"type": "image", "image": Image.open(image)},
{"type": "text", "text": text},
],
}]
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(
text=[prompt_full],
images=[Image.open(image)],
return_tensors="pt",
padding=True,
).to("cpu")
# Generate response
with torch.no_grad():
output = model.generate(**inputs, max_new_tokens=512)
# Decode and return response
response = processor.decode(output[0], skip_special_tokens=True)
# Clean up the response to get just the model's answer
if "<|assistant|>" in response:
response = response.split("<|assistant|>")[1].strip()
return response
# Create a simple Gradio interface
demo = gr.Interface(
fn=extract_medicines,
inputs=gr.Image(type="filepath", label="Upload Prescription Image"),
outputs=gr.Textbox(label="Extracted Medicine Names"),
title="Medicine Name Extractor",
description="Upload prescription images to extract medicine names",
examples=[["examples/prescription1.jpg"]], # Update with your actual example paths or remove if not available
cache_examples=True,
)
if __name__ == "__main__":
demo.launch(debug=True) |