File size: 2,597 Bytes
306dc43
160ecdd
 
306dc43
160ecdd
 
096b5ba
160ecdd
306dc43
 
160ecdd
306dc43
f7b537d
 
ab6adaa
383638b
 
 
306dc43
 
 
ab6adaa
160ecdd
 
f7b537d
306dc43
f7b537d
306dc43
 
 
 
ab6adaa
160ecdd
 
f7b537d
 
 
ab6adaa
160ecdd
f7b537d
160ecdd
ab6adaa
306dc43
 
 
f7b537d
160ecdd
f7b537d
160ecdd
 
 
306dc43
 
383638b
306dc43
 
 
f7b537d
 
306dc43
 
f7b537d
306dc43
 
 
bf7dabd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# app.py
import gradio as gr
import torch
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline
from gtts import gTTS
import tempfile

class AIDoctor:
    def __init__(self,
                 model_id="RedHatAI/Qwen2.5-VL-7B-Instruct-quantized.w8a8"):
        self.device = "cpu"
        print("⚙️ Using device:", self.device)
        self.proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
        self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
            model_id,
            torch_dtype=torch.float32,
            trust_remote_code=True
        ).to(self.device)
        self.stt = pipeline("automatic-speech-recognition",
                            model="openai/whisper-tiny",
                            device=-1)

    def analyze(self, image, question):
        if image is None:
            return "Please upload a medical image."
        prompt = question.strip() or "Analyze this medical image for any abnormalities."
        inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
        outs = self.model.generate(**inputs,
                                   max_new_tokens=150,
                                   temperature=0.7)
        return self.proc.decode(outs[0], skip_special_tokens=True).strip()

    def tts(self, text):
        tts = gTTS(text=text, lang="en")
        path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
        tts.save(path)
        return path

    def respond(self, image, audio, text):
        q = text.strip()
        if audio:
            res = self.stt(audio)
            q_upd = res.get("text", "").strip() if isinstance(res, dict) else str(res)
            if q_upd:
                q = q_upd
        resp = self.analyze(image, q)
        voice = self.tts(resp)
        return resp, voice, q

doctor = AIDoctor()

with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B INT8)") as demo:
    gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B Instruct (quantized)**, CPU‑ready")
    with gr.Row():
        img = gr.Image(label="Upload Medical Image", type="pil")
        aud = gr.Audio(label="Ask by Voice", type="filepath")
    txt = gr.Textbox(label="Ask by Text", lines=2)
    out_txt = gr.Textbox(label="AI Response", lines=10)
    out_aud = gr.Audio(label="AI Speaks", type="filepath")
    q_out = gr.Textbox(label="Processed Question", lines=1)

    btn = gr.Button("Ask Doctor")
    btn.click(fn=doctor.respond,
              inputs=[img, aud, txt],
              outputs=[out_txt, out_aud, q_out])
demo.launch()