File size: 2,324 Bytes
5dbe551
97b296f
beecb06
c7cc8ee
 
beecb06
 
 
 
 
5dbe551
c7cc8ee
beecb06
 
 
 
 
 
 
ade4954
c7cc8ee
beecb06
c7cc8ee
beecb06
ade4954
c7cc8ee
beecb06
ade4954
beecb06
 
 
c7cc8ee
beecb06
 
 
 
 
ade4954
beecb06
c7cc8ee
ade4954
beecb06
 
 
c7cc8ee
 
 
ade4954
c7cc8ee
 
ade4954
c7cc8ee
 
 
ade4954
c7cc8ee
5dbe551
c7cc8ee
 
 
 
 
 
 
 
 
 
5dbe551
c7cc8ee
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import torch
from PIL import Image
import time
from threading import Thread
from transformers import (
    Qwen2VLForConditionalGeneration,
    AutoProcessor,
    TextIteratorStreamer,
)

# Load model and processor - CPU version
MODEL_ID = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
model = Qwen2VLForConditionalGeneration.from_pretrained(
    MODEL_ID,
    trust_remote_code=True,
    torch_dtype=torch.float32  # Using float32 for CPU compatibility
).to("cpu").eval()

def extract_medicines(image):
    """Extract medicine names from prescription images."""
    if image is None:
        return "Please upload a prescription image."
    
    # Process the image
    text = "Extract ONLY the names of medications/medicines from this prescription image. Format the output as a numbered list of medicine names only, without dosages or instructions."
    
    messages = [{
        "role": "user",
        "content": [
            {"type": "image", "image": Image.open(image)},
            {"type": "text", "text": text},
        ],
    }]
    
    prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(
        text=[prompt_full],
        images=[Image.open(image)],
        return_tensors="pt",
        padding=True,
    ).to("cpu")
    
    # Generate response
    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=512)
    
    # Decode and return response
    response = processor.decode(output[0], skip_special_tokens=True)
    
    # Clean up the response to get just the model's answer
    if "<|assistant|>" in response:
        response = response.split("<|assistant|>")[1].strip()
    
    return response

# Create a simple Gradio interface
demo = gr.Interface(
    fn=extract_medicines,
    inputs=gr.Image(type="filepath", label="Upload Prescription Image"),
    outputs=gr.Textbox(label="Extracted Medicine Names"),
    title="Medicine Name Extractor",
    description="Upload prescription images to extract medicine names",
    examples=[["examples/prescription1.jpg"]], # Update with your actual example paths or remove if not available
    cache_examples=True,
)

if __name__ == "__main__":
    demo.launch(debug=True)