Spaces:

TuringsSolutions
/

test-gpt-omni

Sleeping

File size: 3,148 Bytes

b6ab738
1afc2d5
 
72073e1
1afc2d5
 
 
 
 
 
 
8cd9c33
4d0b6cf
1afc2d5
3f88864
8cd9c33
4d0b6cf
 
 
 
 
 
 
 
1afc2d5
 
8cd9c33
4d0b6cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6ab738
8cd9c33
 
 
9b4d9ff
8cd9c33
 
 
9ede33d
8cd9c33
 
3f88864
2cb303a
8cd9c33
 
3f88864
9ede33d
8cd9c33
3f88864
 
9ede33d
8cd9c33

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import LlavaProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
from PIL import Image
from threading import Thread

# Initialize model and processor
model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
processor = LlavaProcessor.from_pretrained(model_id)
model = LlavaForConditionalGeneration.from_pretrained(model_id).to("cpu")

# Initialize inference clients
client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")

def llava(inputs):
    """Processes an image and text input using Llava."""
    try:
        image = Image.open(inputs["files"][0]).convert("RGB")
        prompt = f"<|im_start|>user <image>\n{inputs['text']}<|im_end|>"
        processed = processor(prompt, image, return_tensors="pt").to("cpu")
        return processed
    except Exception as e:
        print(f"Error in llava function: {e}")
        return None

def respond(message, history):
    """Generate a response based on text or image input."""
    try:
        if "files" in message and message["files"]:
            # Handle image + text input
            inputs = llava(message)
            if inputs is None:
                raise ValueError("Failed to process image input")
            
            streamer = TextIteratorStreamer(skip_prompt=True, skip_special_tokens=True)
            thread = Thread(target=model.generate, kwargs=dict(inputs=inputs, max_new_tokens=512, streamer=streamer))
            thread.start()
            
            buffer = ""
            for new_text in streamer:
                buffer += new_text
                history[-1][1] = buffer
                yield history, history
        else:
            # Handle text-only input
            user_message = message["text"]
            history.append([user_message, None])
            prompt = [{"role": "user", "content": msg[0]} for msg in history if msg[0]]
            response = client_mistral.chat_completion(prompt, max_tokens=200)
            bot_message = response["choices"][0]["message"]["content"]
            history[-1][1] = bot_message
            yield history, history
    except Exception as e:
        print(f"Error in respond function: {e}")
        history[-1][1] = f"An error occurred: {str(e)}"
        yield history, history

# Set up Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(placeholder="Enter your message...")
            file_input = gr.File(label="Upload an image")

    def handle_text(text, history=[]):
        """Handle text input and generate responses."""
        return respond({"text": text}, history)

    def handle_file_upload(files, history=[]):
        """Handle file uploads and generate responses."""
        return respond({"files": files, "text": "Describe this image."}, history)

    # Connect components to callbacks
    text_input.submit(handle_text, [text_input, chatbot], [chatbot, chatbot])
    file_input.change(handle_file_upload, [file_input, chatbot], [chatbot, chatbot])

# Launch the Gradio app
demo.launch()