File size: 2,983 Bytes
2ac2001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cff67c8
2ac2001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import os
import time
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision,
    torch_dtype=torch.bfloat16, device_map={"": "cuda"},
    attn_implementation="flash_attention_2"
)
moondream.eval()
def print_like_dislike(x: gr.LikeData):
    print(x.index, x.value, x.liked)

def add_message(history, message):
    # Handle image and text input
    if message["files"]:
        for x in message["files"]:
            history.append(((x,), None))
    if message["text"] is not None:
        history.append((message["text"], None))
    return history, gr.MultimodalTextbox(value=None, interactive=False)
@spaces.GPU(duration=10)
def bot(history):
    # Reverse search through the last 5 messages for an image file
    last_five_messages = history[-5:]  # Get the last five messages
    image_path = None
    for message in reversed(last_five_messages):
        if isinstance(message[0], tuple) and isinstance(message[0][0], str):
            image_path = message[0][0]
            break

    if image_path:
        try:
            image = Image.open(image_path)  # Try to open the image using Pillow
            image_embeds = moondream.encode_image(image)
            print(moondream.answer_question(image_embeds, "Describe this image.", tokenizer))
            response = f"Successfully loaded image from path: {image_path}"
        except IOError:
            response = "Failed to open image. Please check the image path or file permissions."
    elif isinstance(history[-1][0], str):
        response = "HOLA, it's a string"  # Handle text messages
    else:
        response = "**I can only process text messages and images. Please send some text or upload an image!**"

    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        yield history

with gr.Blocks(theme="Monochrome") as demo:
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False
    )

    chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
    chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input])
    bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response")
    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input])

    chatbot.like(print_like_dislike, None, None)

demo.queue()
demo.launch()