# app.py import gradio as gr import torch from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline from gtts import gTTS import tempfile class AIDoctor: def __init__(self, model_id="RedHatAI/Qwen2.5-VL-7B-Instruct-quantized.w8a8"): self.device = "cpu" print("⚙️ Using device:", self.device) self.proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( model_id, torch_dtype=torch.float32, trust_remote_code=True ).to(self.device) self.stt = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=-1) def analyze(self, image, question): if image is None: return "Please upload a medical image." prompt = question.strip() or "Analyze this medical image for any abnormalities." inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device) outs = self.model.generate(**inputs, max_new_tokens=150, temperature=0.7) return self.proc.decode(outs[0], skip_special_tokens=True).strip() def tts(self, text): tts = gTTS(text=text, lang="en") path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name tts.save(path) return path def respond(self, image, audio, text): q = text.strip() if audio: res = self.stt(audio) q_upd = res.get("text", "").strip() if isinstance(res, dict) else str(res) if q_upd: q = q_upd resp = self.analyze(image, q) voice = self.tts(resp) return resp, voice, q doctor = AIDoctor() with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B INT8)") as demo: gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B Instruct (quantized)**, CPU‑ready") with gr.Row(): img = gr.Image(label="Upload Medical Image", type="pil") aud = gr.Audio(label="Ask by Voice", type="filepath") txt = gr.Textbox(label="Ask by Text", lines=2) out_txt = gr.Textbox(label="AI Response", lines=10) out_aud = gr.Audio(label="AI Speaks", type="filepath") q_out = gr.Textbox(label="Processed Question", lines=1) btn = gr.Button("Ask Doctor") btn.click(fn=doctor.respond, inputs=[img, aud, txt], outputs=[out_txt, out_aud, q_out]) demo.launch()