# #Inference using gradio from peft import PeftModel from transformers import Qwen2VLForConditionalGeneration from transformers import AutoProcessor import gradio as gr from transformers import Qwen2VLProcessor from qwen_vl_utils import process_vision_info #load the base model and finetuned adapter base_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct") model = PeftModel.from_pretrained(base_model, "vignesha7/qwen2-2b-instruct-Brain-MRI-Description") processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct") #inference function def generate_description(sample): system_message = "You are an expert MRI radiographer. you can describe what you see in the mri image" prompt = "Describe accurately what you see in this radiology image." messages = [ { "role": "system", "content": [{"type": "text", "text": system_message}] }, { "role": "user", "content" : [ {"type" : "text", "text" : prompt}, {"type" : "image", "image" : sample}] }, ] # Preparation for inference text = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ) inputs = inputs.to(model.device) # Inference: Generation of the output generated_ids = model.generate(**inputs, max_new_tokens=256, top_p=1.0, do_sample=True, temperature=0.8) generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] output_text = processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False ) return output_text[0] ### Gradio app ### title = "BrainMRI Radiology Expert" description = "An Qwen2-VL-2B-Instruct model fine tuned on brain mri images.Describes the brain image" demo = gr.Interface( fn=generate_description, inputs=gr.Image(type='pil'), outputs='text', title=title, description=description, ) demo.launch()