MedAI / app.py
riteshkokam's picture
Update app.py
306dc43 verified
# app.py
import gradio as gr
import torch
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, pipeline
from gtts import gTTS
import tempfile
class AIDoctor:
def __init__(self,
model_id="RedHatAI/Qwen2.5-VL-7B-Instruct-quantized.w8a8"):
self.device = "cpu"
print("⚙️ Using device:", self.device)
self.proc = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float32,
trust_remote_code=True
).to(self.device)
self.stt = pipeline("automatic-speech-recognition",
model="openai/whisper-tiny",
device=-1)
def analyze(self, image, question):
if image is None:
return "Please upload a medical image."
prompt = question.strip() or "Analyze this medical image for any abnormalities."
inputs = self.proc(images=image, text=prompt, return_tensors="pt").to(self.device)
outs = self.model.generate(**inputs,
max_new_tokens=150,
temperature=0.7)
return self.proc.decode(outs[0], skip_special_tokens=True).strip()
def tts(self, text):
tts = gTTS(text=text, lang="en")
path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
tts.save(path)
return path
def respond(self, image, audio, text):
q = text.strip()
if audio:
res = self.stt(audio)
q_upd = res.get("text", "").strip() if isinstance(res, dict) else str(res)
if q_upd:
q = q_upd
resp = self.analyze(image, q)
voice = self.tts(resp)
return resp, voice, q
doctor = AIDoctor()
with gr.Blocks(title="🏥 AI Doctor (Qwen 2.5‑VL‑7B INT8)") as demo:
gr.Markdown("## AI Doctor with **Qwen 2.5‑VL‑7B Instruct (quantized)**, CPU‑ready")
with gr.Row():
img = gr.Image(label="Upload Medical Image", type="pil")
aud = gr.Audio(label="Ask by Voice", type="filepath")
txt = gr.Textbox(label="Ask by Text", lines=2)
out_txt = gr.Textbox(label="AI Response", lines=10)
out_aud = gr.Audio(label="AI Speaks", type="filepath")
q_out = gr.Textbox(label="Processed Question", lines=1)
btn = gr.Button("Ask Doctor")
btn.click(fn=doctor.respond,
inputs=[img, aud, txt],
outputs=[out_txt, out_aud, q_out])
demo.launch()